Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

classicrebuildfp.cpp

Go to the documentation of this file.
00001 
00009 #include "common.hpp"
00010 #include "common/log.h"
00011 #include "io/input/transaction_reader/TransactionReader.hpp"
00012 #include "fpgrowth/buildfptree.hpp"
00013 
00014 
00015 namespace bracz {
00016 
00018   typedef struct bnode_t { 
00019     bnode_t *parent;
00020     item_t item;
00021     counter_t counter;
00022     bnode_t *headerlink;
00023     bnode_t *firstchild;
00024     bnode_t *nextsibling;
00025   } bnode_t;
00026 
00027 
00028   template<class INPUT, FirstLevel FIRSTLEVEL, bool SINGLE> class ClassicRebuildFPStructure {
00029   private:
00030 
00031   public:
00033     typedef struct fptree_t {
00034       counter_t *freqs;
00035       bnode_t **headertable;
00036       bnode_t *root;
00037       counter_t DINLINE getFrequency(item_t i) {
00038         return freqs[i];
00039       }
00040 
00041       counter_t DINLINE getRootFrequency() {
00042         return root->counter;
00043       }
00044     } fptree_t;
00045 
00046 
00047     counter_t getTransactionCount() {
00048       return transaction_count;
00049     }
00050 
00051   private:
00052 
00054     INPUT *inp;
00055     
00056   protected:
00057 
00058     counter_t minsupp;
00059     fptree_t fulltree;
00060 
00061     std::vector<fptree_t> l1trees;
00062 
00063     // used when per-item trees are built
00064     counter_t transaction_count;
00065 
00067     singleualloc<bnode_t, 10*1024> nodeallocator;
00069     singlesalloc<fptree_t,100> treealloc;
00070 
00071 
00072     typedef blockstack<stackmultiblock<bnode_t*,false,stacksingleblock<counter_t,false> > > treecontentalloc_t;
00073     //these will be unused if operating TD mode
00075     treecontentalloc_t treecontentalloc;
00076 
00077 
00078     template<class ARR> void addTransToTree(const ARR &trans, size_t len, fptree_t *tree, counter_t count) {
00079       bnode_t *bnode=tree->root;
00080       bnode->counter+=count;
00081       for(size_t i=0;i<len;++i) {
00082         register item_t curritem=trans[i];
00083         bnode_t** rnode=&(bnode->firstchild);
00084         bnode_t * nnode=*rnode;
00085         tree->freqs[curritem]+=count;
00086         while (nnode && nnode->item<curritem) {
00087           rnode=&(nnode->nextsibling);
00088           nnode=nnode->nextsibling;
00089         }
00090         if (!nnode || (nnode->item>curritem)) {
00091           //we need to allocate a new node
00092           nnode=nodeallocator.allocate();
00093           nnode->parent=i?bnode:NULL;
00094           nnode->item=curritem;
00095           nnode->counter=count;
00096           nnode->headerlink=tree->headertable[curritem];
00097           tree->headertable[curritem]=nnode;
00098 
00099           nnode->nextsibling=*rnode;
00100           *rnode=nnode;
00101           nnode->firstchild=NULL;
00102           
00103         } else {//nnode->item==curritem
00104           nnode->counter+=count;
00105         }
00106         //traverse to next node
00107         bnode=nnode;
00108       }
00109     }
00110 
00111     void DINLINE inittree(fptree_t &fulltree) {
00112       fulltree.root=nodeallocator.allocate();
00113       fulltree.root->item=-1;
00114       fulltree.root->parent=NULL;
00115       fulltree.root->firstchild=NULL;
00116       fulltree.root->counter=0;
00117       fulltree.root->headerlink=NULL;
00118       fulltree.root->nextsibling=NULL;
00119     }
00120 
00121     void DINLINE allocatetree(fptree_t &fulltree, item_t maxitem) {
00122       alignalloc::allocz(fulltree.freqs,maxitem);
00123       alignalloc::allocz(fulltree.headertable,maxitem);
00124       inittree(fulltree);
00125     }
00126 
00128     void buildTree(item_t maxitem) {
00129       log_status(0,"Building unconditional FP tree.");
00130       //build the trie
00131       std::vector<item_t> trans;
00132       inp->rewind();
00133       //    size_t singlec=0;
00134       transaction_count=0;
00135       allocatetree(fulltree,maxitem);
00136       while (counter_t count=inp->nextTransactionBIS(trans)) {
00137         transaction_count+=count;
00138         addTransToTree(trans,trans.size(),&fulltree,count);
00139       }
00140     }
00141 
00142 
00144     void buildAllL1Trees(item_t maxitem) {
00145       log_status(0,"Initializing FP tree.");
00146       l1trees.resize(maxitem);
00147       for(item_t i=0;i<maxitem;++i) {
00148         allocatetree(l1trees[i],i);
00149       }
00150       log_status(0,"Building 1st conditional FP trees.");
00151       //build the trie
00152       std::vector<item_t> trans;
00153       inp->rewind();
00154       //    size_t singlec=0;
00155       transaction_count=0;
00156       while (counter_t count=inp->nextTransactionBIS(trans)) {
00157         transaction_count+=count;
00158         for (size_t itemidx=0;itemidx<trans.size();++itemidx) {
00159           addTransToTree(trans,itemidx,&l1trees[trans[itemidx]],count);
00160         }
00161       }
00162     }
00163 
00164   public:
00165     class SinglePathIterator {
00166     private:
00167       bnode_t *c;
00168     public:
00169       void DINLINE increment(fptree_t *) {
00170         c=c->parent;
00171       }
00172       bool DINLINE atEnd(fptree_t *) {
00173         return !c;
00174       }
00175       item_t DINLINE getCurrItem(fptree_t *) {
00176         return c->item;
00177       }
00178       SinglePathIterator(bnode_t *_c): c(_c){}
00179     };//class SinglePathIterator
00180 
00181     SinglePathIterator getSinglePathIterator(fptree_t *t,item_t curritem) {
00182       return SinglePathIterator(t->headertable[curritem]);
00183     }
00184 
00192     ClassicRebuildFPStructure(INPUT *_inp, item_t maxitem, counter_t _minsupp):inp(_inp),minsupp(_minsupp) {
00193       temptransaction=new item_t[maxitem];
00194       switch(FIRSTLEVEL) {
00195       case FLBuildSingleTree: {
00196         buildTree(maxitem);
00197         break;
00198       }
00199       case FLBuildAllL1Trees: {
00200         buildAllL1Trees(maxitem);
00201         break;
00202       }
00203       case FLSimultProject: {
00204         log_err(0,"Simultaneous projection is not implemented in ClassicFpTree yet.");
00205         exit(1);
00206         break;
00207       }
00208       }
00209     }
00210     
00211     ~ClassicRebuildFPStructure() {
00212       delete [] temptransaction;
00213     }
00214 
00215     fptree_t * getFullTree() {
00216       if (FIRSTLEVEL != FLBuildSingleTree) {
00217         log_err(0,"Requested full tree when per-item trees are calculated!");
00218         return NULL;
00219       } else 
00220         return &fulltree;
00221     }
00222 
00223     fptree_t * getProjTree(item_t item) {
00224       if (FIRSTLEVEL == FLBuildSingleTree) {
00225         log_err(0,"Requested per-item tree when only unconditional tree is calculated!");
00226         return NULL;
00227       } else 
00228         return &(l1trees[item]);
00229     }
00230 
00231 
00232     item_t DINLINE checkSinglePath(fptree_t *t, item_t curritem, item_t spdepth) {
00233       if (!SINGLE) {
00234         return 0;
00235       } else {
00236         while ((spdepth<curritem) && ((!t->headertable[spdepth]) || (!t->headertable[spdepth]->headerlink)))
00237           spdepth++;
00238         return spdepth;
00239       }
00240     }
00241 
00242     template <class O_M> void DINLINE handleSinglePath(fptree_t *t, item_t curritem, O_M *out) {
00243     
00244     }
00245 
00246     void DINLINE zeroDataDense(fptree_t *intr, item_t curritem) {
00247       //partial erase on the `data' part of the tree
00248       alignalloc::zerola(intr->freqs,curritem);
00249       //zero the counters above the current item
00250       for (item_t i=0;i<curritem;++i) {
00251         for(bnode_t *cnode=intr->headertable[i];cnode;cnode=cnode->headerlink) {
00252           cnode->counter=0;
00253         }
00254       }
00255     }
00256 
00257     /*    void DINLINE aggregateDense(fptree_t *intr, item_t curritem) {
00258       for (bnode_t *cnode=intr->headertable[curritem];cnode;cnode=cnode->headerlink) {
00259         counter_t c=cnode->counter;
00260         intr->rootfreq+=c;
00261         bnode_t *nnode=cnode->parent;
00262         while (nnode) {
00263           nnode->counter+=c;
00264           intr->freqs[nnode->item]+=c;
00265           nnode=nnode->parent;
00266         }
00267       }
00268       
00269       }*/
00270 
00271     item_t* temptransaction;
00272 
00273     fptree_t * DINLINE projectTree(fptree_t *intr,item_t curritem) {
00274       /*if (TD) {
00275         zeroDataDense(intr,curritem);
00276         aggregateDense(intr,curritem);
00277         return intr;
00278         } else {*/
00279       fptree_t *nftree = treealloc.allocate();
00280       pair<bnode_t**, counter_t*> als;
00281       treecontentalloc.pushAndGetBlock(curritem,als);
00282       nftree->freqs=als.second;
00283       alignalloc::zero(nftree->freqs,curritem);
00284       nftree->headertable=als.first;
00285       alignalloc::zero(nftree->headertable,curritem);
00286       inittree(*nftree);
00287       
00288       for (bnode_t *cnode=intr->headertable[curritem];cnode;cnode=cnode->headerlink) {
00289         counter_t c=cnode->counter;
00290         bnode_t *nnode=cnode->parent;
00291         item_t *ptr=temptransaction+curritem;
00292         while (nnode) {
00293           if (intr->freqs[nnode->item]>=minsupp) {
00294             --ptr;
00295             *ptr=nnode->item;
00296           }
00297           nnode=nnode->parent;
00298         }
00299         addTransToTree(ptr,temptransaction+curritem-ptr,nftree,c);
00300       }
00301 
00302       return nftree;
00303     }
00304   
00305 
00306     void DINLINE deallocTree(fptree_t *t, fptree_t *parent, item_t projitem) {
00307       for (item_t i=0;i<projitem;++i) {
00308         for(bnode_t *cnode=t->headertable[i];cnode;cnode=cnode->headerlink) {
00309           nodeallocator.free(cnode);
00310         }
00311       }
00312       nodeallocator.free(t->root);
00313       treecontentalloc.freeBlock();
00314       treealloc.free(t);
00315     }
00316 
00317   }; //class ClassicRebuildFPStructure
00318 
00319 
00320 }//namespace bracz

Generated on Sun Sep 17 17:50:38 2006 for FIM environment by  doxygen 1.4.4