Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

trieSizeInfluence.cpp

Go to the documentation of this file.
00001 #include "common.hpp"
00002 #include "common/log.h"
00003 #include "common/allocators.hpp"
00004 #include "io/input/transaction_reader/LBufferedTransactionReader.hpp"
00005 #include "io/input/transaction_reader/SortedTransactionReader.hpp" 
00006 
00007 #include "io/codec/coder/Coder.hpp"
00008 #include "io/codec/decoder/df/SimpleDFDecoder.hpp"
00009 
00010 #include "io/db_cache/RBTreeDBCache.hpp"
00011 
00012 #include "util/StreamParser.hpp"
00013 #include "util/FrequentFilter.cpp"
00014 #include "util/Frequent2Filter.cpp"
00015 
00016 #include "test/apriori/bodon/FattenableLeaf.hpp"
00017 #include "apriori/bodon/Trie.hpp"
00018 #include "datastructures/trie/edgelist/OrderedEdgelist.hpp"
00019 
00020 #include "apriori/bodon/trie/trie_manipulators/FrequentItemInserter.hpp"
00021 #include "apriori/bodon/trie/trie_manipulators/FrequentPairInserter.hpp"
00022 #include "apriori/OneByOneSupportCounter.hpp"
00023 #include "apriori/bodon/trie/trie_manipulators/support_counter/SupportCounterMerge.hpp"
00024 #include "apriori/bodon/trie/trie_manipulators/SimplePruner.hpp"
00025 #include "apriori/bodon/trie/trie_manipulators/CandidateGeneratorPrune.hpp"
00026 #include "apriori/bodon/trie/trie_manipulators/InfreqRemover.hpp"
00027 
00028 #include "apriori/Apriori.hpp"
00029 
00030 #include <vector>
00031 #include <iostream>
00032 #include <string>
00033 
00034 
00035 std::string file_format;
00036 
00037 void init()
00038 {
00039    file_format = "File format:";
00040    file_format += "\n\nThe transactionfile is a plan text file. Each row ";
00041    file_format += "represents a transaction. \n";
00042    file_format += "A transaction is a set of items seperated by a nonnumeric ";
00043    file_format += "character.\nIt can be for example a white space, comma, ";
00044    file_format += "colon, etc.\n";
00045    file_format += "Items are nonnegative integers.\n";
00046 }
00048 void usage()
00049 {
00050    log_err(0,"Usage: apriori-simple transactionfile min_supp outcomefile [options]");
00051    log_err(0," transactionfile\t    file, that contains the tranasctions of items");
00052    log_err(0," outcomefile\t    file to write the outcome");
00053    log_err(0," min_supp\t    absolute support threshold");
00054 
00055    std::cerr << file_format;
00056    log_err(0,"\t\t\tHave a succesful mining ;-)\n\n");
00057 }
00058 
00069 int process_arguments( int argc, char *argv[], counter_t& min_supp, 
00070                        bool &isrel, double &relminsupp )
00071 {
00072    if ( argc < 5 )
00073    {
00074      log_err(0,"There are 4 mandatory arguments.");
00075      return 2;
00076    }
00077    std::string mins=argv[2];
00078    if (mins[mins.size()-1]=='%') {
00079      mins.erase(mins.size()-1);
00080      isrel=true;
00081      relminsupp=atof(mins.c_str());
00082      relminsupp/=100;
00083      log_info(0,"Using relative minimum support of %lg",relminsupp);
00084      return 0;
00085    }
00086    isrel=false;
00087    int min_supp_i;
00088    try
00089    {
00090       convert(argv[2], min_supp_i);
00091       if ( min_supp_i <= 0  )
00092       {
00093          log_err(0,"%s cannot be converted to a positive integer.",argv[3]);
00094          return 3;
00095       }
00096    }
00097    catch(BadConversion e)
00098    {
00099       log_err(0,"min_supp conversion problem.");
00100       return 3;
00101    }
00102    min_supp = static_cast<counter_t>(min_supp_i);
00103    log_info(0,"min_supp is set to %d", min_supp);
00104    return 0;
00105 }
00106 template<class LEAF, class T_R, class DF_D> void 
00107 helperFunction(
00108    T_R& tr_reader, DF_D& df_decoder, counter_t nr_of_transactions,
00109    std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00110    char* input_file, counter_t min_supp)
00111 {
00112    typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelist<std::vector<Edge> > > TRIE;
00113    TRIE main_trie;
00114    typedef SortedTransactionReader< Coder<T_R, DF_D> > S_C_T_R;
00115    typedef Bodon::RBTreeDBCache<S_C_T_R, std::vector<item_t> > S_C;
00116    typename S_C::params_t par_c;
00117    par_c.file_name = input_file;
00118    par_c.mode=FileReprBase::READ;
00119    par_c.largest_item = tr_reader.getLargestItem();
00120    par_c.decoder = &df_decoder;
00121    par_c.freq_items_with_counters = &freq_items_with_counters;
00122    par_c.codemode = ASC;
00123    log_status(0,"Doing sorted codec.");
00124    S_C sorted_coder(&par_c);
00125    
00126    std::vector< std::pair<counter_t, std::pair<item_t, item_t> > >
00127       freq_pairs_with_counters;
00128    Frequent2Filter<S_C> fr_2_filter(
00129       &sorted_coder );
00130 //   Frequent2FilterOnline<S_C> fr_2_filter(
00131 //      &sorted_coder );
00132    log_status(0,"Finding frequent pairs.")
00133       fr_2_filter.findFrequentPairs(freq_pairs_with_counters, min_supp);
00134    
00135    const NEELevel NEE =  NEE_Off;
00136    typedef NewWrapperAlloc<TRIE> TRIE_ALLOCATOR;
00137    TRIE_ALLOCATOR s_alloc;
00138    typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE> FII;
00139    FII fii(main_trie, df_decoder);
00140    typedef Bodon::SupportCounterMerge<TRIE> SUPP_C_BASE;
00141    typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C;
00142 
00143    typedef Bodon::FrequentPairInserter<DF_D, TRIE, TRIE, TRIE_ALLOCATOR, NEE> FPI;
00144    typedef Bodon::trie::SimplePruner<DF_D, TRIE, NewWrapperAlloc<TRIE>, NEE> PRUNER;
00145    typedef Bodon::CandidateGeneratorPrune<PRUNER, DF_D, TRIE, TRIE_ALLOCATOR, NEE> CG;
00146    typedef Bodon::trie::InfreqRemover<DF_D, TRIE, TRIE_ALLOCATOR, NEE> IR;
00147    IR infrequent_remover(main_trie, df_decoder, s_alloc);
00148    typedef Apriori<S_C, DF_D, TRIE, TRIE_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A;
00149    A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00150    log_status(0,"Finding frequent itemsets.")
00151       apriori.findFrequentItemsets( 
00152          nr_of_transactions, *par_c.freq_counters,
00153          freq_pairs_with_counters, min_supp );
00154 }
00155 
00156 
00157 int main( int argc, char *argv[] )
00158 {
00159    init();
00160    counter_t min_supp;
00161    bool relative;
00162    double relminsupp;
00163    
00164    {
00165       int return_val = 
00166          process_arguments( argc, argv, min_supp, relative, relminsupp );
00167       if(return_val)
00168          return return_val;
00169    }
00170 
00171    char* input_file = argv[1];
00172    char* output_file = argv[3];
00173    
00174    try
00175    {
00176       typedef LBufferedTransactionReader< > T_R;
00177       
00178       T_R::params_t par_i;
00179       par_i.file_name = input_file;
00180       par_i.mode=FileReprBase::READ;
00181       par_i.file_buffer_size = 16 * 1024;
00182       T_R tr_reader(&par_i);
00183       std::vector< std::pair<counter_t, item_t> > freq_items_with_counters;
00184       counter_t nr_of_transactions;
00185       // The first step of each algorithms is determining the frequent items.
00186       FrequentFilter<T_R>
00187          fr_filter(tr_reader);
00188       log_status(0,"Finding frequent items.");
00189       fr_filter.findFrequentItems( freq_items_with_counters,  
00190                                    nr_of_transactions, min_supp);
00191       
00192       if(!freq_items_with_counters.empty())
00193       {
00194          log_status(0,"Doing decoder.");
00195          typedef SimpleDFDecoder< > DF_D;
00196 
00197          DF_D::params_t par_d;
00198          par_d.file_name = output_file;
00199          par_d.mode=FileReprBase::WRITE;
00200          DF_D df_decoder(&par_d);
00201 
00202          if(strcmp(argv[4], "0") == 0 )
00203          {
00204             log_status(0,"No vector is added to the nodes")
00205             typedef Bodon::Leaf LEAF;
00206             helperFunction<LEAF, T_R, DF_D>(
00207                tr_reader, df_decoder, nr_of_transactions,
00208                freq_items_with_counters, input_file, min_supp );
00209          }
00210          else if(strcmp(argv[4], "1") ==0 )
00211          {
00212             log_status(0,"A vector of size 1 is added to each node")
00213             typedef Bodon::FattenableLeaf<1> LEAF;
00214             helperFunction<LEAF, T_R, DF_D>(
00215                tr_reader, df_decoder, nr_of_transactions,
00216                freq_items_with_counters, input_file, min_supp );
00217          }
00218          else if(strcmp(argv[4], "2") ==0 )
00219          {
00220             log_status(0,"A vector of size 2 is added to each node")
00221             typedef Bodon::FattenableLeaf<2> LEAF;
00222             helperFunction<LEAF, T_R, DF_D>(
00223                tr_reader, df_decoder, nr_of_transactions,
00224                freq_items_with_counters, input_file, min_supp );
00225          }
00226          else if(strcmp(argv[4], "4") ==0 )
00227          {
00228             log_status(0,"A vector of size 4 is added to each node")
00229             typedef Bodon::FattenableLeaf<4> LEAF;
00230             helperFunction<LEAF, T_R, DF_D>(
00231                tr_reader, df_decoder, nr_of_transactions,
00232                freq_items_with_counters, input_file, min_supp );
00233          }
00234          else if(strcmp(argv[4], "6") ==0 )
00235          {
00236             log_status(0,"A vector of size 6 is added to each node")
00237             typedef Bodon::FattenableLeaf<6> LEAF;
00238             helperFunction<LEAF, T_R, DF_D>(
00239                tr_reader, df_decoder, nr_of_transactions,
00240                freq_items_with_counters, input_file, min_supp );
00241          }
00242          else if(strcmp(argv[4], "8") ==0 )
00243          {
00244             log_status(0,"A vector of size 8 is added to each node")
00245             typedef Bodon::FattenableLeaf<8> LEAF;
00246             helperFunction<LEAF, T_R, DF_D>(
00247                tr_reader, df_decoder, nr_of_transactions,
00248                freq_items_with_counters, input_file, min_supp );
00249          }
00250          else if(strcmp(argv[4], "10") ==0 )
00251          {
00252             log_status(0,"A vector of size 10 is added to each node")
00253             typedef Bodon::FattenableLeaf<10> LEAF;
00254             helperFunction<LEAF, T_R, DF_D>(
00255                tr_reader, df_decoder, nr_of_transactions,
00256                freq_items_with_counters, input_file, min_supp );
00257          }
00258          else if(strcmp(argv[4], "11") ==0 )
00259          {
00260             log_status(0,"A vector of size 11 is added to each node")
00261             typedef Bodon::FattenableLeaf<11> LEAF;
00262             helperFunction<LEAF, T_R, DF_D>(
00263                tr_reader, df_decoder, nr_of_transactions,
00264                freq_items_with_counters, input_file, min_supp );
00265          }
00266          else if(strcmp(argv[4], "12") ==0 )
00267          {
00268             log_status(0,"A vector of size 12 is added to each node")
00269             typedef Bodon::FattenableLeaf<12> LEAF;
00270             helperFunction<LEAF, T_R, DF_D>(
00271                tr_reader, df_decoder, nr_of_transactions,
00272                freq_items_with_counters, input_file, min_supp );
00273          }
00274          else if(strcmp(argv[4], "13") ==0 )
00275          {
00276             log_status(0,"A vector of size 13 is added to each node")
00277             typedef Bodon::FattenableLeaf<13> LEAF;
00278             helperFunction<LEAF, T_R, DF_D>(
00279                tr_reader, df_decoder, nr_of_transactions,
00280                freq_items_with_counters, input_file, min_supp );
00281          }
00282          else if(strcmp(argv[4], "14") ==0 )
00283          {
00284             log_status(0,"A vector of size 14 is added to each node")
00285             typedef Bodon::FattenableLeaf<14> LEAF;
00286             helperFunction<LEAF, T_R, DF_D>(
00287                tr_reader, df_decoder, nr_of_transactions,
00288                freq_items_with_counters, input_file, min_supp );
00289          }
00290          else if(strcmp(argv[4], "15") ==0 )
00291          {
00292             log_status(0,"A vector of size 15 is added to each node")
00293             typedef Bodon::FattenableLeaf<15> LEAF;
00294             helperFunction<LEAF, T_R, DF_D>(
00295                tr_reader, df_decoder, nr_of_transactions,
00296                freq_items_with_counters, input_file, min_supp );
00297          }
00298          else if(strcmp(argv[4], "16") ==0 )
00299          {
00300             log_status(0,"A vector of size 16 is added to each node")
00301             typedef Bodon::FattenableLeaf<16> LEAF;
00302             helperFunction<LEAF, T_R, DF_D>(
00303                tr_reader, df_decoder, nr_of_transactions,
00304                freq_items_with_counters, input_file, min_supp );
00305          }
00306          else if(strcmp(argv[4], "17") ==0 )
00307          {
00308             log_status(0,"A vector of size 17 is added to each node")
00309             typedef Bodon::FattenableLeaf<17> LEAF;
00310             helperFunction<LEAF, T_R, DF_D>(
00311                tr_reader, df_decoder, nr_of_transactions,
00312                freq_items_with_counters, input_file, min_supp );
00313          }
00314          else if(strcmp(argv[4], "18") ==0 )
00315          {
00316             log_status(0,"A vector of size 18 is added to each node")
00317             typedef Bodon::FattenableLeaf<18> LEAF;
00318             helperFunction<LEAF, T_R, DF_D>(
00319                tr_reader, df_decoder, nr_of_transactions,
00320                freq_items_with_counters, input_file, min_supp );
00321          }
00322          else if(strcmp(argv[4], "19") ==0 )
00323          {
00324             log_status(0,"A vector of size 19 is added to each node")
00325             typedef Bodon::FattenableLeaf<19> LEAF;
00326             helperFunction<LEAF, T_R, DF_D>(
00327                tr_reader, df_decoder, nr_of_transactions,
00328                freq_items_with_counters, input_file, min_supp );
00329          }
00330          else if(strcmp(argv[4], "20") ==0 )
00331          {
00332             log_status(0,"A vector of size 20 is added to each node")
00333             typedef Bodon::FattenableLeaf<20> LEAF;
00334             helperFunction<LEAF, T_R, DF_D>(
00335                tr_reader, df_decoder, nr_of_transactions,
00336                freq_items_with_counters, input_file, min_supp );
00337          }
00338          else if(strcmp(argv[4], "21") ==0 )
00339          {
00340             log_status(0,"A vector of size 21 is added to each node")
00341             typedef Bodon::FattenableLeaf<21> LEAF;
00342             helperFunction<LEAF, T_R, DF_D>(
00343                tr_reader, df_decoder, nr_of_transactions,
00344                freq_items_with_counters, input_file, min_supp );
00345          }
00346          else if(strcmp(argv[4], "22") ==0 )
00347          {
00348             log_status(0,"A vector of size 22 is added to each node")
00349             typedef Bodon::FattenableLeaf<22> LEAF;
00350             helperFunction<LEAF, T_R, DF_D>(
00351                tr_reader, df_decoder, nr_of_transactions,
00352                freq_items_with_counters, input_file, min_supp );
00353          }
00354          else if(strcmp(argv[4], "23") ==0 )
00355          {
00356             log_status(0,"A vector of size 23 is added to each node")
00357             typedef Bodon::FattenableLeaf<23> LEAF;
00358             helperFunction<LEAF, T_R, DF_D>(
00359                tr_reader, df_decoder, nr_of_transactions,
00360                freq_items_with_counters, input_file, min_supp );
00361          }
00362          else if(strcmp(argv[4], "24") ==0 )
00363          {
00364             log_status(0,"A vector of size 24 is added to each node")
00365             typedef Bodon::FattenableLeaf<24> LEAF;
00366             helperFunction<LEAF, T_R, DF_D>(
00367                tr_reader, df_decoder, nr_of_transactions,
00368                freq_items_with_counters, input_file, min_supp );
00369          }
00370          else if(strcmp(argv[4], "26") ==0 )
00371          {
00372             log_status(0,"A vector of size 26 is added to each node")
00373             typedef Bodon::FattenableLeaf<26> LEAF;
00374             helperFunction<LEAF, T_R, DF_D>(
00375                tr_reader, df_decoder, nr_of_transactions,
00376                freq_items_with_counters, input_file, min_supp );
00377          }
00378          else if(strcmp(argv[4], "27") ==0 )
00379          {
00380             log_status(0,"A vector of size 27 is added to each node")
00381             typedef Bodon::FattenableLeaf<27> LEAF;
00382             helperFunction<LEAF, T_R, DF_D>(
00383                tr_reader, df_decoder, nr_of_transactions,
00384                freq_items_with_counters, input_file, min_supp );
00385          }
00386          else if(strcmp(argv[4], "28") ==0 )
00387          {
00388             log_status(0,"A vector of size 28 is added to each node")
00389             typedef Bodon::FattenableLeaf<28> LEAF;
00390             helperFunction<LEAF, T_R, DF_D>(
00391                tr_reader, df_decoder, nr_of_transactions,
00392                freq_items_with_counters, input_file, min_supp );
00393          }
00394          else if(strcmp(argv[4], "30") ==0 )
00395          {
00396             log_status(0,"A vector of size 30 is added to each node")
00397             typedef Bodon::FattenableLeaf<30> LEAF;
00398             helperFunction<LEAF, T_R, DF_D>(
00399                tr_reader, df_decoder, nr_of_transactions,
00400                freq_items_with_counters, input_file, min_supp );
00401          }
00402          else if(strcmp(argv[4], "31") ==0 )
00403          {
00404             log_status(0,"A vector of size 31 is added to each node")
00405             typedef Bodon::FattenableLeaf<31> LEAF;
00406             helperFunction<LEAF, T_R, DF_D>(
00407                tr_reader, df_decoder, nr_of_transactions,
00408                freq_items_with_counters, input_file, min_supp );
00409          }
00410          else if(strcmp(argv[4], "32") ==0 )
00411          {
00412             log_status(0,"A vector of size 32 is added to each node")
00413             typedef Bodon::FattenableLeaf<32> LEAF;
00414             helperFunction<LEAF, T_R, DF_D>(
00415                tr_reader, df_decoder, nr_of_transactions,
00416                freq_items_with_counters, input_file, min_supp );
00417          }
00418          else if(strcmp(argv[4], "34") ==0 )
00419          {
00420             log_status(0,"A vector of size 34 is added to each node")
00421             typedef Bodon::FattenableLeaf<34> LEAF;
00422             helperFunction<LEAF, T_R, DF_D>(
00423                tr_reader, df_decoder, nr_of_transactions,
00424                freq_items_with_counters, input_file, min_supp );
00425          }
00426          else if(strcmp(argv[4], "36") ==0 )
00427          {
00428             log_status(0,"A vector of size 36 is added to each node")
00429             typedef Bodon::FattenableLeaf<36> LEAF;
00430             helperFunction<LEAF, T_R, DF_D>(
00431                tr_reader, df_decoder, nr_of_transactions,
00432                freq_items_with_counters, input_file, min_supp );
00433          }
00434          else if(strcmp(argv[4], "40") ==0 )
00435          {
00436             log_status(0,"A vector of size 40 is added to each node")
00437             typedef Bodon::FattenableLeaf<40> LEAF;
00438             helperFunction<LEAF, T_R, DF_D>(
00439                tr_reader, df_decoder, nr_of_transactions,
00440                freq_items_with_counters, input_file, min_supp );
00441          }
00442          else if(strcmp(argv[4], "50") ==0 )
00443          {
00444             log_status(0,"A vector of size 50 is added to each node")
00445             typedef Bodon::FattenableLeaf<50> LEAF;
00446             helperFunction<LEAF, T_R, DF_D>(
00447                tr_reader, df_decoder, nr_of_transactions,
00448                freq_items_with_counters, input_file, min_supp );
00449          }
00450       }
00451    }
00452    catch (std::ios_base::failure e)
00453    {
00454       log_err(0,"Exiting the program due to IO exception");
00455       return 1;
00456    }
00457 }
00458 
00459 

Generated on Sun Sep 17 17:50:40 2006 for FIM environment by  doxygen 1.4.4