Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

nonordfp.cpp

Go to the documentation of this file.
00001 
00009 #include "common.hpp"
00010 #include "common/log.h"
00011 #include "io/input/transaction_reader/TransactionReader.hpp"
00012 #include "fpgrowth/buildfptree.hpp"
00013 
00014 //#define PREFETCH
00015 static const int PREFETCHDIST=64;
00016 
00017 namespace bracz {
00018 
00019 
00020 
00021 
00031   template<class INPUT,class BUILDTREE, FirstLevel FIRSTLEVEL> class NonOrdFPStructure {
00032   public:
00034     typedef uint32_t index_t;
00035 
00036     typedef struct fptree_t {
00038       index_t *itemstarts;
00040       index_t *parents;
00041 
00043       counter_t *freqs;
00045       counter_t *counters;
00046 
00047       enum freeelement_t {
00048         free_none=0,
00049         free_data=1,
00050         free_structure=2
00051       };
00053       int freeelements;
00054 
00055 
00056       fptree_t() {
00057         itemstarts=NULL;
00058         counters=NULL;
00059         parents=NULL;
00060         freqs=NULL;
00061         freeelements=free_none;
00062       }
00063 
00064       counter_t DINLINE getFrequency(item_t i) {
00065         return freqs[i];
00066       }
00067 
00068       counter_t DINLINE getRootFrequency() {
00069         return counters[0];
00070       }
00071     } fptree_t;
00072   
00073     counter_t getTransactionCount() {
00074       return transaction_count;
00075     }
00076 
00077   private:
00078 
00079 
00081     INPUT *inp;
00082 
00083   protected:
00084 
00085     fptree_t fulltree;
00086 
00087     std::vector<fptree_t> l1trees;
00088 
00089     // used when per-item trees are built
00090     counter_t transaction_count;
00091 
00092 
00094     class TreeCopy {
00095     public:
00097 
00101       std::vector<index_t> nextfreenode;
00103 
00110       fptree_t *target;
00111 
00112       void reccopytree(TYPENAME BUILDTREE::nodeptr_t node, index_t nodeidx) {
00113         target->counters[nodeidx]=node->getCounter();
00114         for(TYPENAME BUILDTREE::nodeiter_t i=node->childrenBegin();
00115             i!=node->childrenEnd();
00116             ++i) {
00117           register index_t newidx=nextfreenode[(*i).first]++;
00118           target->parents[newidx]=nodeidx;
00119           target->freqs[(*i).first]+=(*i).second->getCounter();
00120           reccopytree(TYPENAME BUILDTREE::nodeptr_t((*i).second),newidx);
00121         }
00122       }
00123 
00124     };//class TreeCopy
00125 
00132     class TreeAnnotation {
00133     protected:
00134       std::vector<counter_t> nodesperitem;
00135 
00136     public:
00137       inline void DINLINE initTree(item_t maxitem) {
00138         nodesperitem.resize(maxitem,0);
00139       }
00140 
00141       inline void DINLINE onNewTreeNode(item_t curritem,counter_t /*count*/) {
00142         nodesperitem[curritem]++;
00143       }
00144     
00145 
00146       template<class BUILDTREE_> inline void DINLINE buildTreeToFinalTree(BUILDTREE_ & btr, typename BUILDTREE_::buildtree_t &tree, fptree_t &fulltree, item_t maxitem) {
00147         //      log_dbg(0,"nodes per item:");
00148         //calculate and fill itemstarts array
00149         alignalloc::alloc(fulltree.itemstarts,maxitem+1);
00150         alignalloc::allocz(fulltree.freqs,maxitem);
00151         index_t last=1;//node 0 is the root
00152         for(size_t i=0;i<maxitem;++i) {
00153           fulltree.itemstarts[i]=last;
00154           //        fprintf(stderr,"(%d=%d) ",i,nodesperitem[i]);
00155           last+=nodesperitem[i];
00156         }
00157         //fprintf(stderr,"\n");
00158         fulltree.itemstarts[maxitem]=last;
00159         //    log_info(0,"Build FP-tree: nodes %d, multiples %d",last,singlec);
00160         //log_info(0,"Build FP-tree: nodes %d",last);
00161         //compact the trie, copy data to the final structure
00162         //log_status(0,"Creating unconditional compact FP tree.");
00163         alignalloc::alloc(fulltree.parents,last);
00164         alignalloc::alloc(fulltree.counters,last);
00165         TreeCopy tc;
00166         tc.nextfreenode.assign(fulltree.itemstarts, fulltree.itemstarts+maxitem);
00167         tc.target=&fulltree;
00168         tc.reccopytree((tree.getRoot()),0);
00169         fprintf(stderr,"[%d %d %d] ",maxitem,fulltree.counters[0],last);
00170         //log_status(0,"done root counter=%d",fulltree.counters[0]);
00171       }
00172     }; //class TreeAnnotation
00174     void buildTree(item_t maxitem) {
00175       TreeAnnotation treeann;
00176       BUILDTREE btr;
00177       typename BUILDTREE::buildtree_t buildtree;
00178       btr.initBuildTree(buildtree,maxitem,treeann);
00179 
00180       log_status(0,"Building temporary FP tree.");
00181       //build the trie
00182       std::vector<item_t> trans;
00183       inp->rewind();
00184       //    size_t singlec=0;
00185       transaction_count=0;
00186       while (counter_t count=inp->nextTransactionBIS(trans)) {
00187         transaction_count+=count;
00188         btr.addTransToTree(trans,trans.size(),buildtree,count,treeann);
00189       }
00190       treeann.buildTreeToFinalTree(btr,buildtree,fulltree,maxitem);
00191     }
00192 
00194     void buildAllL1Trees(item_t maxitem) {
00195       BUILDTREE btr;
00196       // holds the number of nodes for each item id
00197       std::vector<TreeAnnotation> treeannotations;
00198       std::vector<typename BUILDTREE::buildtree_t> trees;
00199       trees.resize(maxitem);
00200       treeannotations.resize(maxitem);
00201       log_status(0,"Initializing FP tree.");
00202       for(item_t i=0;i<maxitem;++i) {
00203         btr.initBuildTree(trees[i],i,treeannotations[i]);
00204       }
00205       log_status(0,"Building temporary FP trees.");
00206       //build the trie
00207       std::vector<item_t> trans;
00208       inp->rewind();
00209       //    size_t singlec=0;
00210       transaction_count=0;
00211       while (counter_t count=inp->nextTransactionBIS(trans)) {
00212         transaction_count+=count;
00213         for (size_t itemidx=0;itemidx<trans.size();++itemidx) {
00214           btr.addTransToTree(trans,itemidx,trees[trans[itemidx]],count,treeannotations[trans[itemidx]]);
00215         }
00216       }
00217       l1trees.resize(maxitem);
00218       for(item_t i=0;i<maxitem;++i) {
00219         treeannotations[i].buildTreeToFinalTree(btr,trees[i],l1trees[i],i);
00220       }
00221       fprintf(stderr,"\n");
00222     }
00223           
00224 
00225 
00226     class SimultProject {
00227 
00228       typedef struct simultprojnode_t {
00229         item_t parentitem; //NOTE this holds 'item+1', item 0 is reserved for the root
00230         index_t parentoffset;
00231         counter_t counter;
00232         simultprojnode_t *next;
00233       } simultprojnode_t __attribute__ ((aligned (16)));;
00234 
00235       bracz::blockalloc<simultprojnode_t,100000> tempnodealloc;
00236           
00237 
00238       typedef struct temptrie_t {
00239       public:
00240         bracz::maxvector<simultprojnode_t*> nodes;
00241       } temptrie_t; //struct temptrie_t
00242 
00243       //the current path in the recursion
00244       std::vector<item_t> curritemset;
00245 
00246       //this holds the unassembled temporary nonord fp trees
00247       bracz::maxvector<temptrie_t> temptrees;
00248 
00249       //parent
00250       NonOrdFPStructure *parent;
00251         
00252       class counterAddCombine {
00253       public:
00254         inline static void DINLINE combine (counter_t &dest, const counter_t &src) {
00255           dest+=src;
00256         }
00257       };
00258 
00259       template <typename T, class CombineFn> class itemlist_t {
00260       private:
00261         std::vector<item_t> itemlist;
00262         T* elements;
00263         uint32_t maxitem;
00264 
00265       public:
00266         itemlist_t(): elements(NULL){}
00267         
00268         inline void DINLINE init(uint32_t _maxitem){
00269           maxitem=_maxitem;
00270           itemlist.reserve(maxitem);
00271           alignalloc::allocz(elements,maxitem);
00272         }
00273 
00274         ~itemlist_t() {
00275           alignalloc::freeifnonnull(elements);
00276         }
00277 
00278         inline T& DINLINE getItem(item_t i) {
00279           return elements[i];
00280         }
00281 
00282         inline void DINLINE setItem(item_t i,T element) {
00283           if (!getItem(i)) {
00284             itemlist.push_back(i);
00285           }
00286           CombineFn::combine(elements[i],element);
00287         }
00288 
00289         inline void DINLINE combine(const itemlist_t &o) {
00290           for(index_t idx=0;idx<o.itemlist.size();++idx) {
00291             item_t i=o.itemlist[idx];
00292             setItem(i,o.elements[i]);
00293           }
00294         }
00295 
00296         inline void DINLINE clear() {
00297           // **** may clear with memory zero routine
00298           if (true) {
00299             for(index_t idx=0;idx<itemlist.size();++idx) {
00300               item_t i=itemlist[idx];
00301               elements[i]=T();
00302             }
00303           } else {
00304             alignalloc::zero(elements,maxitem);
00305           }
00306           itemlist.clear();
00307         }
00308 
00309         class iterator {
00310         private:
00311           itemlist_t *parent;
00312           uint32_t idx;
00313         public:
00314           iterator(itemlist_t *p,uint32_t i):parent(p),idx(i){
00315           }
00316           iterator(const iterator &o):parent(o.parent),idx(o.idx) {
00317           }
00318           inline bool DINLINE operator==(const iterator&o) const {
00319             return (idx==o.idx)&&(parent==o.parent);
00320           }
00321           inline bool DINLINE operator!=(const iterator&o) const {
00322             return (idx!=o.idx)||(parent!=o.parent);
00323           }
00324           inline iterator& operator++() {
00325             ++idx;
00326             return *this;
00327           }
00328           inline std::pair<item_t, T> operator*() const {
00329             item_t i=parent->itemlist[idx];
00330             return std::make_pair(i,parent->elements[i]);
00331           }
00332 
00333         }; //class iterator
00334         friend class iterator;
00335         iterator begin() {
00336           return iterator(this,0);
00337         }
00338         iterator end() {
00339           return iterator(this,itemlist.size());
00340         }
00341       }; //class itemlist_t
00342 
00343       typedef itemlist_t<counter_t,counterAddCombine> myitemlist_t;  
00344 
00345       bracz::maxvector<myitemlist_t> itemlists;
00346       //a 2-dim array of node pointers for the current node ptr along the recursion. It is indexed with item ids
00347       bracz::maxvector<simultprojnode_t**> recursenodes;  
00348 
00349       item_t maxitem;
00350 
00351       size_t totnodecount;
00352 
00353       void simultRecurse(TYPENAME BUILDTREE::nodeptr_t node) {
00354         index_t idx=curritemset.size()-1;
00355         itemlists[idx].clear();
00356                                                        // **** might be better to post-zero the existing nodes
00357         //union children's list into my list
00358         for(TYPENAME BUILDTREE::nodeiter_t i=node->childrenBegin();
00359             i!=node->childrenEnd();
00360             ++i) {
00361           item_t childitem = (*i).first;
00362           TYPENAME BUILDTREE::nodeptr_t childp((*i).second);
00363           //fprintf(stderr,"child %d, counter %d (",childitem,(*childp).getCounter());
00364           curritemset.push_back(childitem+1);
00365 
00366           //insert the child's item into the itemlists of all its parent nodes
00367           //first look how much we need to go upwards
00368           index_t pidx=idx;
00369           while (!recursenodes[pidx][childitem])//watch: recursenodes[0] is initialized with the root node of each tree
00370             --pidx;
00371           ++pidx;//the terminating node needs not to be created
00372           while(pidx<=idx) {
00373             //insert node
00374             simultprojnode_t *newnode = tempnodealloc.allocate();
00375             totnodecount++;
00376             //newnode->counter=0;
00377             newnode->parentitem=curritemset[pidx-1];//this is already 1-indexed
00378             //the parent is the last node inserted with that item id in this tree.
00379             newnode->parentoffset=parent->l1trees[childitem].itemstarts[curritemset[pidx-1]]-1; //already indexed from 1
00380             //add the new node to the counter
00381             parent->l1trees[childitem].itemstarts[curritemset[pidx]]++;
00382             //add the new node to the linked list
00383             //fprintf(stderr,"ins pidx %d into temptrees[%d].nodes[%d]",pidx,childitem,curritemset[pidx]);
00384             newnode->next=temptrees[childitem].nodes[curritemset[pidx]];
00385             temptrees[childitem].nodes[curritemset[pidx]]=newnode;
00386             //add the new node to the recursion node table
00387             recursenodes[pidx][childitem]=newnode;
00388             pidx++;
00389           }
00390 
00391 
00392           //alignalloc::zero(recursenodes[idx+1],maxitem); // **** would be enough to zero FROM current item till maxitem
00393           simultRecurse(childp);
00394 
00395           itemlists[idx].combine(itemlists[idx+1]);
00396           itemlists[idx].setItem(childitem,(*childp).getCounter());
00397           //fprintf(stderr,")");
00398           curritemset.pop_back();
00399         }
00400         //insert my node into all trees that are in mylist
00401         //fprintf(stderr,"\"");
00402         for (typename myitemlist_t::iterator i=itemlists[idx].begin();
00403              i!=itemlists[idx].end();
00404              ++i) {
00405           //the parent pointers, next, etc. are already filled by the descendant who has first inserted i->first into my itemlist
00406           recursenodes[idx][(*i).first]->counter=(*i).second;
00407           recursenodes[idx][(*i).first]=NULL;
00408           //fprintf(stderr,"{%d %d}",(*i).first,(*i).second);
00409         }
00410         //fprintf(stderr,"\"");
00411       }//function simultrecurse
00412 
00413     public:
00414       void doSimultProject(NonOrdFPStructure *par,item_t maxitem, uint32_t maxdepth, BUILDTREE &btr,typename BUILDTREE::buildtree_t  &tree) {
00415         log_status(0,"Preparing simultaneous projection");
00416         parent=par;
00417         totnodecount=0;
00418         parent->l1trees.resize(maxitem);
00419         this->maxitem=maxitem;
00420         curritemset.reserve(maxdepth+1);
00421         curritemset.resize(0);
00422         curritemset.push_back(0);//watch
00423         temptrees.resize(maxitem);
00424         itemlists.resize(maxdepth+1);
00425         recursenodes.resize(maxdepth+1);
00426         for(uint32_t i=0;i<=maxdepth;++i) {
00427           itemlists[i].init(maxitem);
00428           alignalloc::allocz(recursenodes[i],maxitem);
00429         }
00430         for (item_t i=0;i<maxitem;++i) {
00431           temptrees[i].nodes.resize(i+1,NULL);
00432           //this will hold the count of nodes for each item
00433           //this is indexed with 'item+1'
00434           alignalloc::allocz(parent->l1trees[i].itemstarts,i+1);
00435           parent->l1trees[i].itemstarts[0]=1;//we do have a node for item id -1, the root.
00436           recursenodes[0][i]=tempnodealloc.allocate();
00437           totnodecount++;
00438           recursenodes[0][i]->parentitem=0;//**** may cause problems
00439           recursenodes[0][i]->parentoffset=0;
00440           recursenodes[0][i]->next=NULL;
00441           recursenodes[0][i]->counter=0;
00442           temptrees[i].nodes[0]=recursenodes[0][i];
00443         }
00444         log_status(0,"Doing simultaneous projection");
00445         simultRecurse(tree.getRoot());
00446         //fprintf(stderr,"\n");
00447         log_info(0,"total node count %u",totnodecount);
00448         log_status(0,"Copying temporary tries into the final containers.");
00449         for(uint32_t i=0;i<=maxdepth;++i) {
00450           alignalloc::free(recursenodes[i]);
00451         }
00452         bracz::maxvector<index_t> nodeoffsets;
00453         nodeoffsets.reserve(maxitem+2);
00454         bracz::maxvector<index_t> nextnode;
00455         nextnode.reserve(maxitem+1);
00456         for (item_t curritem=0;curritem<maxitem;++curritem) {
00457           //fprintf(stderr,"%d ",curritem);
00458           fptree_t &t=parent->l1trees[curritem];
00459           nodeoffsets.resize(curritem+2);
00460           nextnode.resize(curritem+1);
00461           nodeoffsets[0]=0;
00462           for(item_t i=1;i<=curritem+1;++i) {
00463             nodeoffsets[i]=nodeoffsets[i-1]+t.itemstarts[i-1];
00464             nextnode[i-1]=t.itemstarts[i-1]=nodeoffsets[i];
00465           }
00466           //nextnode=nodeoffsets;
00467           size_t nodecount=t.itemstarts[curritem];
00468           alignalloc::alloc(t.parents,nodecount);
00469           alignalloc::alloc(t.counters,nodecount);
00470           alignalloc::alloc(t.freqs,curritem);
00471           for(item_t i=0;i<=curritem;++i) {
00472             counter_t allc=0;
00473             for (simultprojnode_t *cnode=temptrees[curritem].nodes[i];cnode;cnode=cnode->next) {
00474               __builtin_prefetch(cnode->next);
00475               index_t idx=--nextnode[i];
00476               //fprintf(stderr,"node %d, counter %d; ",idx,cnode->counter);
00477               t.parents[idx]=nodeoffsets[cnode->parentitem]+cnode->parentoffset;
00478               allc+=(t.counters[idx]=cnode->counter);
00479             }
00480             if (i) {
00481               t.freqs[i-1]=allc;
00482               //fprintf(stderr,"f[%d %d] ",i,allc);
00483             } else {
00484               //fprintf(stderr,"root(%d %d) ",allc,t.counters[0]);
00485             }
00486           }//for levels to copy
00487         }//for temp tree to copy
00488         log_status(0,"Finished. Starting main recursion.");
00489       }//doSimultProject
00490 
00491     };
00492 
00493     friend class SimultProject;
00494 
00495     void simultProject(item_t maxitem) {
00496       bracz::nullTreeAnn treeann;
00497       BUILDTREE btr;
00498       typename BUILDTREE::buildtree_t buildtree;
00499       btr.initBuildTree(buildtree,maxitem,treeann);
00500 
00501       log_status(0,"Building temporary FP tree.");
00502       //build the trie
00503       std::vector<item_t> trans;
00504       inp->rewind();
00505       //    size_t singlec=0;
00506       transaction_count=0;
00507       uint32_t maxdepth = 0;
00508       while (counter_t count=inp->nextTransactionBIS(trans)) {
00509         transaction_count+=count;
00510         if (trans.size()>maxdepth) 
00511           maxdepth=trans.size();
00512         btr.addTransToTree(trans,trans.size(),buildtree,count,treeann);
00513       }
00514       SimultProject projctr;
00515       projctr.doSimultProject(this,maxitem,maxdepth,btr,buildtree);
00516     }
00517 
00518   public:
00519 
00526     NonOrdFPStructure(INPUT *_inp, item_t maxitem) {
00527       inp=_inp;
00528       switch(FIRSTLEVEL) {
00529       case FLBuildSingleTree: {
00530         buildTree(maxitem);
00531         break;
00532       }
00533       case FLBuildAllL1Trees: {
00534         buildAllL1Trees(maxitem);
00535         break;
00536       }
00537       case FLSimultProject: {
00538         simultProject(maxitem);
00539         break;
00540       }
00541       }
00542     }
00543 
00544     fptree_t * getFullTree() {
00545       if (FIRSTLEVEL != FLBuildSingleTree) {
00546         log_err(0,"Requested full tree when per-item trees are calculated!");
00547         return NULL;
00548       } else 
00549         return &fulltree;
00550     }
00551 
00552     fptree_t * getProjTree(item_t item) {
00553       if (FIRSTLEVEL == FLBuildSingleTree) {
00554         log_err(0,"Requested per-item tree when only unconditional tree is calculated!");
00555         return NULL;
00556       } else 
00557         return &(l1trees[item]);
00558     }
00559 
00560 
00561     ~NonOrdFPStructure() {
00562       alignalloc::freeifnonnull(fulltree.parents);
00563       alignalloc::freeifnonnull(fulltree.itemstarts);
00564       alignalloc::freeifnonnull(fulltree.counters);
00565       alignalloc::freeifnonnull(fulltree.freqs);
00566     }
00567   
00568     bool DINLINE checkSinglePath(fptree_t *t, item_t curritem) {
00569       return false;
00570     }
00571 
00572     template <class O_M> void DINLINE handleSinglePath(fptree_t *t, item_t curritem, O_M *out) {
00573     
00574     }
00575 
00576   }; //class nonordfpstructure
00577 
00597   template<class INPUT,class BUILDTREEALLOC, bool SINGLE, bool TD,
00598   bool PROJECT, bool PROJECTDELETECLOSED, bool PROJMERGENODES,
00599   FirstLevel FIRSTLEVEL, bool SPARSEAGGR> class TDNonOrdFPStructure : public NonOrdFPStructure<INPUT,BUILDTREEALLOC,FIRSTLEVEL> {
00600   public:
00601     typedef TYPENAME NonOrdFPStructure<INPUT,BUILDTREEALLOC,FIRSTLEVEL>::fptree_t fptree_t;
00602     typedef TYPENAME NonOrdFPStructure<INPUT,BUILDTREEALLOC,FIRSTLEVEL>::index_t index_t;
00603 
00604   private:
00605     counter_t minsupp;
00606 
00607     //these will be unused if single path optimization is turned off
00608     counter_t *multiples;
00609     index_t *last;
00610     //these will be unused if projections are turned off
00612     index_t *projmap;
00614     index_t *projlastidx;
00615     //these will be used only in sparse aggregation
00617     index_t *sparsenodes;
00619     index_t **nextfreesparsenode;
00620 
00621     //these will be unused if operating TD mode
00623     blockstack<stacksingleblock<counter_t,true> > nodealloc;
00625     blockstack<stacksingleblock<counter_t,false> > headeralloc;
00627     blockstack<stacksingleblock<index_t,false> > projheaderalloc;
00629     blockstack<stacksingleblock<index_t,false> > projnodealloc;
00631     singlesalloc<fptree_t,100> treealloc;
00632 
00633     using NonOrdFPStructure<INPUT,BUILDTREEALLOC,FIRSTLEVEL>::l1trees;
00634 
00635   public:
00636 
00637 
00638     TDNonOrdFPStructure(INPUT *_inp, item_t maxitem, counter_t _minsupp) :      NonOrdFPStructure<INPUT,BUILDTREEALLOC,FIRSTLEVEL>(_inp,maxitem),minsupp(_minsupp) {
00639       if (SINGLE) {
00640         alignalloc::alloc(multiples,maxitem);
00641         alignalloc::alloc(last,maxitem);
00642       }
00643       index_t maxtreesize=0;
00644       if (PROJECT||SPARSEAGGR) {
00645         if (FIRSTLEVEL!=FLBuildSingleTree) {
00646           for (item_t i=0;i<maxitem;++i) { 
00647             if (l1trees[i].itemstarts[i]>maxtreesize) {
00648               maxtreesize=l1trees[i].itemstarts[i];
00649             }
00650           }
00651         } else {
00652           maxtreesize=this->fulltree.itemstarts[maxitem];
00653         }
00654       }
00655       if (PROJECT) {
00656         alignalloc::alloc(projmap,maxtreesize);
00657         alignalloc::alloc(projlastidx,maxtreesize);
00658       }
00659       if (SPARSEAGGR) {
00660         alignalloc::allocz(sparsenodes,maxtreesize);
00661         alignalloc::alloc(nextfreesparsenode,maxitem);
00662       }
00663     }
00664 
00665     ~TDNonOrdFPStructure() {
00666       if (SINGLE) {
00667         alignalloc::free(multiples);
00668         alignalloc::free(last);
00669       }
00670     }
00671 
00672     class SinglePathIterator {
00673     private:
00674       index_t nodeidx;
00675       item_t curritem;
00676     public:
00677       SinglePathIterator () {}
00678       SinglePathIterator(index_t _n, item_t _i):
00679         nodeidx(_n), curritem(_i) {}
00680 
00681       void DINLINE increment (fptree_t *tree) {
00682         nodeidx=tree->parents[nodeidx];
00683         while (nodeidx && (tree->itemstarts[curritem]>nodeidx))
00684           curritem--;
00685       }
00686 
00687       bool DINLINE atEnd(fptree_t *tree) {
00688         return (!nodeidx);
00689       }
00690 
00691       item_t DINLINE getCurrItem(fptree_t *tree) {
00692         return curritem;
00693       }
00694 
00695     };
00696 
00698 
00703     item_t DINLINE checkSinglePath(fptree_t *t, item_t curritem, item_t spdepth) {
00704       if (!SINGLE) {
00705         return 0;
00706       } else {
00707         while ((spdepth<curritem) && (multiples[spdepth]<=1))
00708           spdepth++;
00709         return spdepth;
00710       }
00711     }
00712 
00713     SinglePathIterator DINLINE getSinglePathIterator(fptree_t *tree, item_t curritem) {
00714       if (!SINGLE) {
00715         log_err(0,"Error: requested SinglePathIterator when singlepath optimization is not enabled");
00716       }
00717       return SinglePathIterator(last[curritem],curritem);
00718     }
00719 
00720 
00721     void DINLINE zeroDataDense(fptree_t *intr, item_t curritem) {
00722       //partial erase on the `data' part of the tree
00723       alignalloc::zerola(intr->counters,intr->itemstarts[curritem]);
00724       alignalloc::zerola(intr->freqs,curritem);
00725       if (SINGLE)
00726         alignalloc::zero(multiples,curritem+1);
00727     }
00728     
00729     void DINLINE zeroDataADense(fptree_t *intr, item_t curritem) {
00730       //aligned erase on the `data' part of the tree
00731       if (!SPARSEAGGR) 
00732         alignalloc::zero(intr->counters,intr->itemstarts[curritem]);
00733       alignalloc::zero(intr->freqs,curritem);
00734       if (SINGLE)
00735         alignalloc::zero(multiples,curritem+1);
00736     }
00737 
00738     void DINLINE aggregateSparse(index_t* itemstarts,index_t *parents, counter_t *freqs, counter_t *ocounters, counter_t *ncounters, item_t curritem) {
00739       for (item_t i=0;i<curritem;++i) {
00740         nextfreesparsenode[i]=sparsenodes+itemstarts[i];
00741       }
00742       item_t paritem=curritem;
00743       for(index_t nodeidx=itemstarts[curritem];nodeidx<itemstarts[curritem+1];++nodeidx) {
00744         if (ocounters[nodeidx]) {
00745           register index_t par=parents[nodeidx];
00746           if (par && !ncounters[parents[nodeidx]]) {
00747             if (itemstarts[paritem]>par) {
00748               do {
00749                 --paritem;
00750               } while (itemstarts[paritem]>par);
00751             } else {
00752               while (itemstarts[paritem+1]<=par) ++paritem;
00753             }
00754             *(nextfreesparsenode[paritem]++)=par;
00755           }
00756           ncounters[parents[nodeidx]]+=ocounters[nodeidx];
00757         }
00758       }
00759       for (item_t nextitem=curritem;nextitem>0;) {
00760         --nextitem;//pre-decrement: workaround of unsigned int item_t and underflow
00761         if (SINGLE) {
00762           multiples[nextitem]=nextfreesparsenode[nextitem]-(sparsenodes+itemstarts[nextitem]);
00763           last[nextitem]=*(nextfreesparsenode[nextitem]-1);
00764         }
00765         for(index_t *pnodeidx=sparsenodes+itemstarts[nextitem];pnodeidx!=nextfreesparsenode[nextitem];++pnodeidx) {
00766           index_t nodeidx=*pnodeidx;
00767           counter_t cnt=ncounters[nodeidx];
00768           index_t par=parents[nodeidx];
00769           freqs[nextitem]+=cnt;
00770           if (par && !ncounters[par]) {
00771             if (itemstarts[paritem]>par) {
00772               do {
00773                 --paritem;
00774               } while (itemstarts[paritem]>par);
00775             } else {
00776               while (itemstarts[paritem+1]<=par) ++paritem;
00777             }
00778             *(nextfreesparsenode[paritem]++)=par;
00779           }
00780           ncounters[par]+=cnt;
00781         }//for nodes of current item
00782       }//for next item
00783     } //aggregateSparse
00784     
00785     void DINLINE aggregateDense(index_t* itemstarts,index_t *parents, counter_t *freqs, counter_t *ocounters, counter_t *ncounters, item_t curritem) {
00786 #ifdef PREFETCH      
00787       for(index_t nodeidx=itemstarts[curritem+1];nodeidx>itemstarts[curritem];) {
00788         --nodeidx;
00789         if (nodeidx>PREFETCHDIST)
00790           __builtin_prefetch(ncounters+parents[nodeidx-PREFETCHDIST]);
00791         ncounters[parents[nodeidx]]+=ocounters[nodeidx];  
00792       }
00793 #else
00794       for(index_t nodeidx=itemstarts[curritem];nodeidx<itemstarts[curritem+1];++nodeidx) {
00795         ncounters[parents[nodeidx]]+=ocounters[nodeidx];  
00796       }
00797 #endif
00798       for (item_t nextitem=curritem;nextitem>0;) {
00799         --nextitem;//pre-decrement: workaround of unsigned int item_t and underflow
00800 #ifdef PREFETCH
00801         for(index_t nodeidx=itemstarts[nextitem+1]-1;nodeidx>=itemstarts[nextitem];--nodeidx) {
00802           if (nodeidx>PREFETCHDIST)
00803             __builtin_prefetch(ncounters+parents[nodeidx-PREFETCHDIST]); 
00804 #else
00805         for(index_t nodeidx=itemstarts[nextitem];nodeidx<itemstarts[nextitem+1];++nodeidx) {
00806 #endif
00807           register counter_t cnt=ncounters[nodeidx];
00808           if (cnt) {
00809             {
00810               register int par=parents[nodeidx];
00811               ncounters[par]+=cnt;
00812             }
00813             freqs[nextitem]+=cnt;
00814             if (SINGLE) {
00815               multiples[nextitem]++;
00816               last[nextitem]=nodeidx;
00817             }
00818           }//if current node nonzero
00819         }//for nodes of current item
00820       }//for next item
00821     } //aggregateDense
00822 
00823     fptree_t * DINLINE projectTree(fptree_t *intr,item_t curritem) {
00824       if (TD) {
00825         zeroDataDense(intr,curritem);
00826         aggregateDense(intr->itemstarts,intr->parents,intr->freqs,intr->counters,intr->counters,curritem);
00827         return intr;
00828       } else {
00829         fptree_t *nftree = treealloc.allocate();
00830         nftree->parents=intr->parents;
00831         nftree->itemstarts=intr->itemstarts;
00832         nftree->counters=NULL;
00833         nodealloc.pushAndGetBlock(nftree->itemstarts[curritem],nftree->counters);
00834         nftree->freqs=NULL;
00835         headeralloc.pushAndGetBlock(curritem,nftree->freqs);
00836         zeroDataADense(nftree,curritem);
00837         if (SPARSEAGGR) {
00838           aggregateSparse(nftree->itemstarts,nftree->parents,nftree->freqs,intr->counters,nftree->counters,curritem);
00839 
00840         } else {
00841           aggregateDense(nftree->itemstarts,nftree->parents,nftree->freqs,intr->counters,nftree->counters,curritem);
00842         }
00843         if (PROJECT) {
00844          if (SPARSEAGGR) 
00845          compactTreeSparse(nftree,curritem);
00846          else
00847          compactTree(nftree,curritem);
00848 
00849         }
00850         return nftree;
00851       }
00852     }
00853 
00854     inline bool DINLINE dropNode(fptree_t *t, item_t curritem, index_t nodeidx) {
00855       return (t->counters[nodeidx]==0) || (t->freqs[curritem]<minsupp) || (PROJECTDELETECLOSED&&(t->freqs[curritem]==t->getRootFrequency()));
00856     }
00857 
00858     //do not call this routine in TD mode -- it (destructively) modifies the fptree_t *t.
00859     inline void DINLINE compactTree(fptree_t *t, item_t projitem) {
00860       index_t nn=1; //next free node index
00861       index_t *newparents;
00862       index_t *newitemstarts;
00863       projheaderalloc.pushAndGetBlock(projitem+1,newitemstarts);
00864       projnodealloc.pushAndGetBlock(t->itemstarts[projitem],newparents);
00865       projmap[0]=0;
00866       newparents[0]=0;
00867       projlastidx[0]=0;
00868       for (item_t curritem=0; curritem<projitem; ++curritem) {
00869         newitemstarts[curritem]=nn;
00870         for (index_t nodeidx=t->itemstarts[curritem];nodeidx<t->itemstarts[curritem+1];++nodeidx) {
00871           if (dropNode(t,curritem,nodeidx)) {
00872             projmap[nodeidx]=projmap[t->parents[nodeidx]];
00873           } else {
00874             index_t newidx;
00875             if (PROJMERGENODES && 
00876                 ((newidx=projlastidx[projmap[t->parents[nodeidx]]])
00877                  >=newitemstarts[curritem])) {
00878               //if there is a child of my (new) parent on this level (=with this item as the merge
00879               t->counters[newidx]+=t->counters[nodeidx];
00880             } else {
00881               //allocate a new node
00882               newidx=nn++;
00883               t->counters[newidx]=t->counters[nodeidx]; //newidx<=nodeidx, we won't overwrite any position we'd want to use later
00884               newparents[newidx]=projmap[t->parents[nodeidx]];
00885               projlastidx[newparents[newidx]]=newidx;
00886               projlastidx[newidx]=0;
00887             }
00888             projmap[nodeidx]=newidx;
00889           }//if drop-or-keep node
00890         }//for nodes at the current level
00891         if (SINGLE) {
00892           multiples[curritem]=nn-newitemstarts[curritem];
00893           last[curritem]=newitemstarts[curritem];
00894         }
00895       }//for items
00896       newitemstarts[projitem]=nn;
00897       t->parents=newparents;
00898       t->itemstarts=newitemstarts;
00899       t->freeelements|=fptree_t::free_structure;
00900     }//compactTree
00901 
00902 
00903     inline void DINLINE compactTreeSparse(fptree_t *t, item_t projitem) {
00904       index_t nn=1; //next free node index
00905       index_t *newparents;
00906       index_t *newitemstarts;
00907       projheaderalloc.pushAndGetBlock(projitem+1,newitemstarts);
00908       projnodealloc.pushAndGetBlock(t->itemstarts[projitem],newparents);
00909       counter_t *newcounters;
00910       projnodealloc.pushAndGetBlock(t->itemstarts[projitem],newcounters);
00911       projmap[0]=0;
00912       newparents[0]=0;
00913       projlastidx[0]=0;
00914       newcounters[0]=t->counters[0];
00915       for (item_t curritem=0; curritem<projitem; ++curritem) {
00916         newitemstarts[curritem]=nn;
00917         for(index_t *pnodeidx=sparsenodes+t->itemstarts[curritem];pnodeidx!=nextfreesparsenode[curritem];++pnodeidx) {
00918           index_t nodeidx=*pnodeidx;
00919           //        for (index_t nodeidx=t->itemstarts[curritem];nodeidx<t->itemstarts[curritem+1];++nodeidx) {
00920           if (dropNode(t,curritem,nodeidx)) {
00921             projmap[nodeidx]=projmap[t->parents[nodeidx]];
00922           } else {
00923             index_t newidx;
00924             if (PROJMERGENODES && 
00925                 ((newidx=projlastidx[projmap[t->parents[nodeidx]]])
00926                  >=newitemstarts[curritem])) {
00927               //if there is a child of my (new) parent on this level (=with this item as the merge
00928               newcounters[newidx]+=t->counters[nodeidx];
00929             } else {
00930               //allocate a new node
00931               newidx=nn++;
00932               newcounters[newidx]=t->counters[nodeidx]; //newidx<=nodeidx, we won't overwrite any position we'd want to use later
00933               newparents[newidx]=projmap[t->parents[nodeidx]];
00934               projlastidx[newparents[newidx]]=newidx;
00935               projlastidx[newidx]=0;
00936             }
00937             projmap[nodeidx]=newidx;
00938           }//if drop-or-keep node
00939           t->counters[nodeidx]=0;
00940         }//for nodes at the current level
00941         if (SINGLE) {
00942           multiples[curritem]=nn-newitemstarts[curritem];
00943           last[curritem]=newitemstarts[curritem];
00944         }
00945       }//for items
00946       newitemstarts[projitem]=nn;
00947       t->parents=newparents;
00948       t->itemstarts=newitemstarts;
00949       t->freeelements|=fptree_t::free_structure;
00950       t->counters[0]=0;
00951       t->counters=newcounters;
00952       nodealloc.freeBlock();
00953     }//compactTree
00954 
00955     void DINLINE deallocTree(fptree_t *t, fptree_t *parent, item_t projitem) {
00956       //in case of TD nothing to do as there is only one tree which is global
00957       if (!TD) {
00958         if (PROJECT) {
00959           projheaderalloc.freeBlock();
00960           projnodealloc.freeBlock();
00961           if (SPARSEAGGR) {//allocated two blocks on projnodealloc
00962             projnodealloc.freeBlock();
00963           }
00964         }
00965         if (!PROJECT || !SPARSEAGGR) {
00966           nodealloc.freeBlock();
00967         }
00968         headeralloc.freeBlock();
00969         treealloc.free(t);
00970       }
00971     }
00972   }; //class TDNonOrdFPStructure
00973 
00974 
00975 } //namespace bracz

Generated on Sun Sep 17 17:50:39 2006 for FIM environment by  doxygen 1.4.4