Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

core.cpp

Go to the documentation of this file.
00001 
00009 #include "common.hpp"
00010 
00011 #include <stack>
00012 #include <vector>
00013 
00014 #include "common/log.h"
00015 #include "common/maxvector.cpp"
00016 
00017 namespace bracz {
00018 
00034   template<class STRUCTURE, class OUTPUT>
00035   class FPGrowthBase {
00036   protected:
00037   
00038     typedef TYPENAME STRUCTURE::fptree_t fptree_t;
00039 
00041     typedef struct {
00043       item_t nextitem;
00045       item_t lastitem;
00047       fptree_t *fptree;
00049       item_t singlepdepth;
00050     } tdrecurse_t;
00051 
00052   public:
00064     void findFrequentItems(const counter_t minsupp, STRUCTURE &str, OUTPUT &out, item_t maxitem) {}
00065 
00066   };
00067 
00068 
00069   template<class STRUCTURE, class OUTPUT, bool SINGLEPATH, NEELevel NEE, FirstLevel FIRSTLEVEL> class FPGrowthBaseNoNEE : public FPGrowthBase<STRUCTURE,OUTPUT> {
00070   protected:
00071     typedef TYPENAME FPGrowthBase<STRUCTURE,OUTPUT>::tdrecurse_t tdrecurse_t;
00072   public:
00073 #define VECTOR bracz::maxvector
00074 
00075     FPGrowthBaseNoNEE(){}
00076 
00077     //global variables for the core recursion
00078     VECTOR<item_t> curritemset;
00079     VECTOR<item_t> neelist;
00080     VECTOR<counter_t> neecounts;
00081     //    VECTOR<TYPENAME STRUCTURE::SinglePathIterator> itst;
00082 
00083     //num freq itemset
00084     size_t numfreq;
00085     //count of freqitems in main recursion
00086     size_t numfreqrec;
00087     //statistics for single path optimization
00088     size_t singleheader;
00089     size_t singlepart;
00090     size_t singlebody;
00091 
00092     counter_t minsupp;
00093 
00094     item_t maxitem;
00095 
00096 
00097     OUTPUT *out;
00098 
00099     STRUCTURE *str;
00100 
00102     TYPENAME STRUCTURE::fptree_t *singleptree;
00104     counter_t cfreq;
00105 
00106   private:
00107     inline void DINLINE outputPush(item_t item) {
00108       if (OUTPUT::isDFO()) {
00109         out->pushItem(item);
00110       } else {
00111         curritemset.push_back(item);
00112       }
00113     }
00114 
00115     inline void DINLINE outputPop() {
00116       if (OUTPUT::isDFO()) {
00117         out->popItem();
00118       } else {
00119         curritemset.pop_back();
00120       }
00121     }
00122 
00123     inline void DINLINE outputWriteLow(counter_t supp) {
00124       numfreq++;
00125       if (OUTPUT::isDFO()) {
00126         out->write(supp);
00127       } else {
00128         out->writeItemsetAndCounter(curritemset.begin(),curritemset.end(),supp);
00129       }
00130     }
00131 
00133     counter_t neesupp;
00134 
00135     void neeRecurse(size_t neeidx) {
00136       //choose the first element to insert
00137       while(neeidx<neelist.size()) {
00138         if (OUTPUT::isDFO()) {
00139           numfreq++;
00140           out->pushItemWithPrevSupport(neelist[neeidx]);
00141         } else {
00142           outputPush(neelist[neeidx]);
00143           outputWriteLow(neesupp);
00144         }
00145         neeRecurse(++neeidx);
00146         outputPop();
00147       }
00148     }
00149 
00150 
00151     inline void DINLINE outputWrite(counter_t supp) {
00152       outputWriteLow(supp);
00153       if (NEE==NEE_Full) {
00154         neesupp=supp;
00155 #ifdef STATS_NEECOUNT
00156         neecounts[neelist.size()]++;
00157 #endif
00158         neeRecurse(0);
00159       }
00160     }
00161 
00162 
00163     inline void DINLINE outputPushAndWrite(item_t item, counter_t supp) {
00164       outputPush(item);
00165       outputWrite(supp);
00166     }
00167 
00168   public:
00169 
00170     void singlePRecurse(TYPENAME STRUCTURE::SinglePathIterator itop) {
00171       while(itop.increment(singleptree),!itop.atEnd(singleptree)) {
00172         outputPushAndWrite(itop.getCurrItem(singleptree),cfreq);
00173         // ****
00174         singlebody++;
00175         singlePRecurse(itop);
00176       }
00177       outputPop();
00178     }
00179 
00180     void freqRecurse(tdrecurse_t stop) {
00181       counter_t rootfreq=stop.fptree->getRootFrequency();
00182       if (NEE==NEE_Full) {
00183         for (item_t curritem=stop.lastitem;curritem>(SINGLEPATH?stop.singlepdepth:0);) {
00184           --curritem;
00185           if (stop.fptree->getFrequency(curritem)==rootfreq) {
00186             //fprintf(stderr,"\nnee item %d singlepdepth %d lastitem %d | ",curritem,stop.singlepdepth,stop.lastitem);
00187             neelist.push_back(curritem);
00188           } //if conditionally closed item
00189         }//for available items
00190       }//if NEE
00191       outputWrite(rootfreq);
00192       for (item_t curritem=0;curritem<stop.lastitem;++curritem) {
00193         if (stop.lastitem==maxitem) {
00194           fprintf(stderr,"%d ",curritem);
00195         }
00196         counter_t cfreq=stop.fptree->getFrequency(curritem);
00197         if (cfreq < minsupp) {
00198           continue;
00199         } else if (SINGLEPATH && curritem < stop.singlepdepth) {
00200           singlepart++;
00201           outputPushAndWrite(curritem,cfreq);
00202           //out->pushItem(maxitem-1);//outputPushAndWrite(0,cfreq);
00203           this->cfreq=cfreq;
00204           this->singleptree=stop.fptree;
00205           singlePRecurse(str->getSinglePathIterator(stop.fptree,curritem));
00206           //out->popItem();
00207           continue;
00208         } else if ((NEE==NEE_Full) && (cfreq==rootfreq)) {
00209           //          fprintf(stderr,"\nnee popback %d singlepdepth %d cfreq %d root freq %d |",curritem,stop.singlepdepth,cfreq,rootfreq);
00210           neelist.pop_back();
00211           continue;
00212         }
00213         numfreqrec++;
00214         outputPush(curritem);
00215         tdrecurse_t nr;
00216         nr.nextitem=0;
00217         nr.lastitem=curritem;
00218         if ((NEE==NEE_Level1) && (cfreq==rootfreq)) {
00219           nr.fptree=stop.fptree;
00220           nr.singlepdepth=stop.singlepdepth;
00221           freqRecurse(nr);
00222         } else {
00223           nr.fptree=str->projectTree(stop.fptree,curritem);
00224           // **** remove this sanity check
00225           if (cfreq!=nr.fptree->getRootFrequency()) {
00226             log_err(0,"Inconsistency: cfreq %u cond tree freq %u",cfreq,nr.fptree->getRootFrequency());
00227           }
00228           nr.singlepdepth=str->checkSinglePath(nr.fptree,curritem,stop.singlepdepth);
00229           freqRecurse(nr);
00230           str->deallocTree(nr.fptree, stop.fptree, curritem);
00231         }
00232         outputPop();
00233       }
00234     }
00235 
00247     void NOINLINE findFrequentItems(const counter_t minsupp, STRUCTURE &str, OUTPUT &out, item_t maxitem) {
00248       curritemset.reserve(maxitem);
00249       neelist.reserve(maxitem);
00250 #ifdef STATS_NEECOUNT
00251       neecounts.reserve(maxitem);
00252       for (item_t i=0;i<maxitem;++i)
00253         neecounts[i]=0;
00254 #endif
00255       this->minsupp=minsupp;
00256       this->out=&out;
00257       this->str=&str;
00258       //num freq itemset
00259       numfreq=0;
00260       numfreqrec=0;
00261       //statistics for single path optimization
00262       singleheader=0;
00263       singlepart=0;
00264       singlebody=0;
00265       this->maxitem=maxitem;
00266       tdrecurse_t globtree;
00267 
00268       if (FIRSTLEVEL!=FLBuildSingleTree) {
00269         out.write(str.getTransactionCount());
00270         for (item_t curritem=0;curritem<maxitem;++curritem) {
00271           fprintf(stderr,"%d ",curritem);
00272           globtree.nextitem=0;
00273           globtree.singlepdepth=0;
00274           globtree.lastitem=curritem;
00275           globtree.fptree=str.getProjTree(curritem);
00276           outputPush(curritem);
00277           freqRecurse(globtree);
00278           outputPop();
00279         }
00280       } else {
00281         globtree.nextitem=0;
00282         globtree.singlepdepth=0;
00283         globtree.lastitem=maxitem;
00284         globtree.fptree=str.getFullTree();
00285         freqRecurse(globtree);
00286       }
00287       fprintf(stderr,"\n");
00288       log_info(0,"core recursion ended. numfreq rec=%zu, single header=%zu, part=%zu, body=%zu",numfreqrec,singleheader,singlepart,singlebody);
00289       log_info(0,"Total frequent itemsets returned: %zu",numfreq);
00290     }
00291     
00292     ~FPGrowthBaseNoNEE() {
00293 #ifdef STATS_NEECOUNT
00294       for (item_t i=0;i<maxitem;++i) {
00295         fprintf(stderr,"length %d, count %u\n",i,neecounts[i]);
00296       }
00297 #endif
00298     }
00299 
00300   };//class FPGrowthBaseNoNEE
00301 
00302 /*
00303 
00304   stack: last item, structure, counter,
00305   for next item=last item -1 downto 0 do
00306     project
00307     recurse (next item, new structure, new counter)
00308     cleanup
00309   end for
00310 
00311 
00312   stack: last item, next item, structure, counter
00313   while(!stack.empty) do
00314     if (stack.top.nextitem == stack.top.lastitem) then
00315       pop stack
00316       stack.top.nextitem++;
00317     else
00318       if stack.top.nextitem is not frequent then continue
00319       itemset.push_back(nextitem)
00320       output the (single) new itemset:
00321       project onto stack.top.nextitem
00322       check-singlepath
00323       get-conditional-frequencies
00324       check-and-extend conditional closed item list
00325       push projection onto stack with nextitem=0
00326     endif
00327 
00328   
00329 */
00330 
00331 
00332      
00333 } //namespace bracz                                                                                                

Generated on Sun Sep 17 17:50:38 2006 for FIM environment by  doxygen 1.4.4