00001 
00006 #ifndef __ECLAT_HPP_
00007 #define __ECLAT_HPP_ t
00008 
00009 
00010 
00011 
00012 #ifdef ECLAT_DEFECT
00013 static const bool EXTENSION_DESCRIPTION_DEFECTS = true; 
00014 static const bool OMIT_FINAL_INCIDENCE_MATRIX = true;
00015 static const bool PRUNE_EQUISUPPORT_EXTENSIONS = true;
00016 static const bool EARLY_FILTERING = true;
00017 
00018 static const bool INTERSECTION_ROWSWAPPING = false; 
00019 #endif
00020 
00021 #ifdef ECLAT_COVER
00022 static const bool EXTENSION_DESCRIPTION_DEFECTS = false; 
00023 static const bool OMIT_FINAL_INCIDENCE_MATRIX = true;
00024 static const bool PRUNE_EQUISUPPORT_EXTENSIONS = true;
00025 static const bool EARLY_FILTERING = true;
00026 
00027 static const bool INTERSECTION_ROWSWAPPING = false; 
00028 #endif
00029 
00030 
00031 
00032 #include "SparseBitmatrix_create.hpp"
00033 #include "SparseBitmatrix_setdifference.hpp"
00034 #include "SparseBitmatrix_intersection.hpp"
00035 
00036 
00037 #include "common/debug.hpp"
00038 #include "io/input/transaction_reader/TransactionReader.hpp"
00039 #include "common.hpp"
00040 
00041 #include <vector>
00042 
00043 
00044 static const bool OUTPUT_HINTS = false;
00045 
00046 
00047 
00048 template <class TRANSACTION_READER, class PATTERN_WRITER>
00049 class Eclat 
00050 {
00051 public:
00052   Eclat(TRANSACTION_READER& tdb, CounterItemPairs& freqItems, int minSup, int numTransactions, PATTERN_WRITER& out) :
00053     tdb(tdb), freqItems(freqItems), minSup(minSup), numTransactions(numTransactions), out(out)
00054   {
00055     MAX_PATTERN_LENGTH = 1000; 
00056     ptis = new (SparseBitmatrix*)[MAX_PATTERN_LENGTH];
00057     eeIterator = new typename PATTERN_WRITER::EquisupportItemOutputIterator(out);
00058   }
00059   ~Eclat() {};
00060 
00061   void findFrequentPatterns();
00062   
00063 private:
00064   TRANSACTION_READER& tdb;
00065   CounterItemPairs& freqItems;
00066   int minSup;
00067   int numTransactions;
00068   PATTERN_WRITER& out;
00069 
00073   typename PATTERN_WRITER::EquisupportItemOutputIterator* eeIterator;
00074 
00075   
00076   int MAX_PATTERN_LENGTH;
00077 
00081   int depth;
00082 
00086   int maxdefect;
00087 
00091   SparseBitmatrix** ptis;
00092 
00096   void checkUpToTwoRows(SparseBitmatrix* iti, counter_t minSup, counter_t maxdefect);
00097 
00101   void depthFirstWalk();
00102 
00103 
00104 };
00105 
00106 
00107 template <class TRANSACTION_READER, class PATTERN_WRITER>
00108 void Eclat<TRANSACTION_READER, PATTERN_WRITER>::findFrequentPatterns() {
00109   
00110   if (numTransactions >= minSup)
00111     out.write(numTransactions);
00112   
00113   
00114   std::cout << "build item/transaction incidence matrix..." << std::flush;
00115   SparseBitmatrix* iti = createSparseItemTransactionIncidenceMatrix(tdb, freqItems);
00116   int numItems = freqItems.size();
00117   std::cout << " - done" << std::endl;
00118 
00119 #if DEBUG_LEVEL >= 20  
00120   std::cout << "iti:" << std::endl;
00121   iti->dump();
00122 #endif
00123 
00124 #if DEBUG_LEVEL >= 20  
00125   std::cout << "item freqs: ";
00126   for(CounterItemPairs::iterator it_freqItems = freqItems.begin(); it_freqItems != freqItems.end(); ++it_freqItems )
00127     std::cout << it_freqItems->first << " "; 
00128   std::cout << endl;
00129 #endif
00130 
00131   
00132   if (! EXTENSION_DESCRIPTION_DEFECTS) { 
00133     std::cout << "compute covers..." << std::flush;
00134     for (int i = 0; i < MAX_PATTERN_LENGTH; i++)
00135       ptis[i] = NULL;
00136     ptis[0] = iti;
00137     depth = 0;
00138     depthFirstWalk();
00139     std::cout << " - done" << std::endl;
00140     return;
00141   }
00142 
00143   
00144   int rowCapacityIti2 = iti->computeMaxRowLength();
00145   if (EARLY_FILTERING) 
00146     rowCapacityIti2 -= minSup;
00147   SparseBitmatrix* iti2 = new SparseBitmatrix(numItems-1, rowCapacityIti2); 
00148   depth = 1;
00149   
00150   
00151   ptis[0] = iti;
00152   ptis[1] = iti2;
00153 
00154   
00155   std::cout << "compute defects..." << std::flush;
00156   
00157   
00158   for (int i = 0; i < numItems-1; i++) {
00159     
00160     assert(i >= 0 && i < iti->nRows());
00161     out.pushItem(iti->rowLabel(i)); 
00162     int itiRowLengthi = iti->rowLength(i); 
00163     
00164     maxdefect = itiRowLengthi - minSup;
00165     int numPairs = setdifferenceOfSparseBitmatrix<EARLY_FILTERING,PRUNE_EQUISUPPORT_EXTENSIONS>(*iti, i, *iti2, maxdefect, *eeIterator);
00166 
00167     if (numPairs > 0)
00168       depthFirstWalk();
00169     
00170     out.write(itiRowLengthi);
00171     out.popItem();
00172   }
00173   
00174   out.pushItem(iti->rowLabel(numItems-1)); 
00175   out.write(iti->rowLength(numItems-1)); 
00176   std::cout << " - done" << std::endl;
00177 }
00178 
00179 
00180 
00181 template <class TRANSACTION_READER, class PATTERN_WRITER>
00182 void Eclat<TRANSACTION_READER, PATTERN_WRITER>::checkUpToTwoRows(SparseBitmatrix* iti, 
00183                                                                  counter_t minSup, counter_t maxdefect) {
00184   assert(iti->nRows() <= 2);
00185 
00186   
00187   out.pushItem(iti->rowLabel(0));
00188   if (OUTPUT_HINTS) out.hint(string("a"));
00189   out.write(EXTENSION_DESCRIPTION_DEFECTS  
00190             ? minSup+maxdefect-iti->rowLength(0)
00191             : iti->rowLength(0)); 
00192   out.popItem();
00193   
00194   if (iti->nRows() == 2) {
00195     
00196     out.pushItem(iti->rowLabel(1)); 
00197     if (OUTPUT_HINTS) out.hint(string("b"));
00198     out.write(EXTENSION_DESCRIPTION_DEFECTS  
00199               ? minSup+maxdefect-iti->rowLength(1)
00200               : iti->rowLength(1)); 
00201     out.popItem();
00202 
00203     
00204     int maxdef0 = maxdefect - iti->rowLength(0);
00205     int rowlen01 = EXTENSION_DESCRIPTION_DEFECTS
00206       ? cardinalitySetdifferenceBySparseBitmatrix_TwoRows<EARLY_FILTERING>(*iti, maxdef0)
00207       : cardinalityIntersectionSparseBitmatrix_TwoRows<EARLY_FILTERING>(*iti, minSup);
00208     if ((EXTENSION_DESCRIPTION_DEFECTS 
00209          ? rowlen01 <= maxdef0
00210          : rowlen01 >= minSup)) {
00211       
00212       out.pushItem(iti->rowLabel(0));
00213       out.pushItem(iti->rowLabel(1)); 
00214       if (OUTPUT_HINTS) out.hint(string("c"));
00215       out.write(EXTENSION_DESCRIPTION_DEFECTS  
00216                 ? minSup+maxdef0-rowlen01
00217                 : rowlen01);
00218       out.popItem();
00219       out.popItem();
00220     }
00221   }
00222 }
00223 
00224 template <class TRANSACTION_READER, class PATTERN_WRITER>
00225 void Eclat<TRANSACTION_READER, PATTERN_WRITER>::depthFirstWalk() {
00226 #if DEBUG_LEVEL >= 10
00227   cout << endl
00228        << out.getCurrentPatternAsString() << " "
00229        << " (maxdefect=" << maxdefect << ", depth = " << depth <<", nrows = " << ptis[depth]->nRows() << "):" << endl;
00230 #endif
00231 #if DEBUG_LEVEL >= 30
00232   ptis[depth]->dump();
00233 #endif
00234 
00235   
00236   assert(ptis[depth] != NULL);
00237   SparseBitmatrix* pti = ptis[depth];
00238   
00239   SparseBitmatrix* bmOut;
00240   if (OMIT_FINAL_INCIDENCE_MATRIX || pti->nRows() >= 2) { 
00241     if (EXTENSION_DESCRIPTION_DEFECTS) {
00242       int rowCapacity = pti->maxRowLength(); 
00243       if (EARLY_FILTERING && maxdefect < rowCapacity)
00244         rowCapacity = maxdefect; 
00245       bmOut = new SparseBitmatrix(pti->nRows()-1, rowCapacity);
00246     } else { 
00247       bmOut = ptis[depth+1];
00248       if (bmOut == NULL)
00249         bmOut = new SparseBitmatrix(ptis[0]->nRows()-depth-1, &(ptis[0]->capacities()[depth+1]));
00250       
00251     
00252     }
00253     ptis[depth+1] = bmOut;
00254   } else
00255     bmOut = NULL;
00256 
00257   int numSiblings = pti->nRows();
00258   
00259   
00260   depth++;
00261   for (int i = 0; i < numSiblings-1; i++) {
00262     assert(i < pti->capacity());
00263     
00264     out.pushItem(pti->rowLabel(i));
00265     
00266     
00267     int numSiblingsNext; 
00268     if (EXTENSION_DESCRIPTION_DEFECTS) {
00269       numSiblingsNext = setdifferenceBySparseBitmatrix<EARLY_FILTERING,PRUNE_EQUISUPPORT_EXTENSIONS>(*pti, i, *bmOut, maxdefect-pti->rowLength(i), *eeIterator);
00270       if (numSiblingsNext > 0) {
00271         maxdefect -= pti->rowLength(i);
00272         if (OMIT_FINAL_INCIDENCE_MATRIX && numSiblingsNext <= 2)
00273           checkUpToTwoRows(bmOut, minSup, maxdefect);
00274         else
00275           depthFirstWalk(); 
00276         maxdefect += pti->rowLength(i);
00277       }
00278     } else { 
00279       numSiblingsNext = intersectSparseBitmatrix<EARLY_FILTERING, INTERSECTION_ROWSWAPPING, PRUNE_EQUISUPPORT_EXTENSIONS>(*pti, i, *bmOut, minSup, *eeIterator);
00280       if (numSiblingsNext > 0) 
00281         depthFirstWalk(); 
00282     }
00283     
00284     
00285     assert(i >= 0 && i < pti->numRows);
00286     if (OUTPUT_HINTS) out.hint(string("d"));
00287     out.write(EXTENSION_DESCRIPTION_DEFECTS  
00288               ? minSup+maxdefect-pti->rowLength(i)
00289               : pti->rowLength(i)); 
00290     out.popItem();
00291   }
00292   
00293   
00294   int i = numSiblings-1;
00295   out.pushItem(pti->rowLabel(i)); 
00296   if (OUTPUT_HINTS) out.hint(string("e"));
00297     out.write(EXTENSION_DESCRIPTION_DEFECTS 
00298               ? minSup+maxdefect-pti->rowLength(i)
00299               : pti->rowLength(i)); 
00300   out.popItem();
00301 
00302   
00303   if (EXTENSION_DESCRIPTION_DEFECTS && bmOut != NULL) { 
00304     delete bmOut;  
00305     ptis[depth] = NULL;
00306   }
00307   
00308   depth--;
00309 }
00310 
00311 
00312 #endif