Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

tr_cache.cpp

Go to the documentation of this file.
00001 
00002 #include "common.hpp"
00003 #include "common/log.h"
00004 
00005 #include "io/input/transaction_reader/brBufferedTransactionReader.hpp"
00006 
00007 #include "io/codec/decoder/df/SimpleDFDecoder.hpp"
00008 //#include "io/codec/decoder/df/CacheDFDecoder.hpp"
00009 #include "io/codec/decoder/df/DFDecoderWithEEManagement.hpp"
00010 //#include "io/output/StatOutput.hpp"
00011 #include "io/output/normal/SortOutput.hpp"
00012 //#include "io/codec/decoder/df/DFDecoderWrapper.hpp"
00013 
00014 #include "util/StreamParser.hpp"
00015 #include "util/FrequentFilter.cpp"
00016 
00017 #include "datastructures/maxvector.hpp"
00018 #include "datastructures/trie/edgelist/OrderedEdgelist.hpp"
00019 //#include "datastructures/trie/edgelist/OrderedEdgelistDynLookup.hpp"
00020 #include "datastructures/trie/edgelist/OffsetIndexVector.hpp"
00021 
00022 
00023 #include "apriori/bodon/Leaf.hpp"
00024 #include "apriori/bodon/Trie.hpp"
00025 #include "apriori/bodon/TrieNEE.hpp"
00026 
00027 #include "io/codec/coder/Coder.hpp"
00028 
00029 #include "test/apriori/bodon/AprioriSelector2.hpp"
00030 
00031 #include <vector>
00032 #include <iostream>
00033 #include <string>
00034 
00035 std::string file_format;
00036 
00037 void init()
00038 {
00039    file_format = "File format:";
00040    file_format += "\n\nThe transactionfile is a plan text file. Each row ";
00041    file_format += "represents a transaction. ";
00042    file_format += "A transaction is a set of items seperated by a nonnumeric ";
00043    file_format += "character.\nIt can be for example a white space, comma, ";
00044    file_format += "colon, etc.\n";
00045    file_format += "Items are nonnegative integers.\n";
00046 }
00048 void usage()
00049 {
00050    cerr<< "Usage: tr_cache tr_cache_option transactionfile min_supp outcomefile ordering_option nee_option [options]\n\n";
00051    cerr<<" tr_cache_option  option of transaction caching, \n\t\t  i.e: off, patricia, rb-tree-simul,";
00052    cerr<<" \n\t\t  rb-tree-insert-clear-simple, rb-tree-insert-clear-pro \n";
00053    cerr<<" transactionfile  file, that contains the tranasctions of items\n";
00054    cerr<<" min_supp\t  absolute support threshold\n";
00055    cerr<<" outcomefile\t  file to write the outcome\n";
00056    cerr<<" ordering_option  the ordering to be used, i.e: ASC, DESC\n";
00057    cerr<<" nee_option\t  equisupport pruning option, i.e: off, level3\n";
00058 
00059    std::cerr << file_format;
00060    cerr<<"\t\t\tHave a succesful mining ;-)\n\n";
00061 }
00062 
00074 int process_arguments( int argc, char *argv[], counter_t& min_supp, 
00075                        bool &isrel, double &relminsupp, unsigned int& maxsize )
00076 {
00077    if ( argc < 7 )
00078    {
00079      log_err(0,"There are 6 mandatory arguments.");
00080      usage();
00081      return 2;
00082    }
00083    std::string mins=argv[3];
00084    if (mins[mins.size()-1]=='%') {
00085      mins.erase(mins.size()-1);
00086      isrel=true;
00087      relminsupp=atof(mins.c_str());
00088      relminsupp/=100;
00089      log_info(0,"Using relative minimum support of %lg",relminsupp);
00090      return 0;
00091    }
00092    isrel=false; relminsupp=0;
00093    int min_supp_i;
00094    try
00095    {
00096       convert(argv[3], min_supp_i);
00097       if ( min_supp_i <= 0  )
00098       {
00099          log_err(0,"%s cannot be converted to a positive integer.",argv[3]);
00100          return 3;
00101       }
00102    }
00103    catch(BadConversion e)
00104    {
00105       log_err(0,"min_supp conversion problem.");
00106       return 3;
00107    }
00108    min_supp = static_cast<counter_t>(min_supp_i);
00109    log_info(0,"min_supp is set to %d", min_supp);
00110    if(argc == 8)
00111    {
00112       int maxsize_i;
00113       try
00114       {
00115          convert(argv[7], maxsize_i);
00116          if ( maxsize_i <= 0  )
00117          {
00118             log_err(0,"%s cannot be converted to a positive integer.",argv[6]);
00119             return 4;
00120          }
00121       }
00122       catch(BadConversion e)
00123       {
00124          log_err(0,"max_size conversion problem.");
00125          return 4;
00126       }
00127       maxsize = static_cast<unsigned int>(maxsize_i);
00128       log_status(0,"maxsize is set to %d", maxsize);
00129    }
00130    else
00131       maxsize = largest_itemsetsize;
00132    return 0;
00133 }
00134 
00135 int main( int argc, char *argv[] )
00136 {
00137    init();
00138    counter_t min_supp;
00139    unsigned int maxsize;
00140    bool relative;
00141    double relminsupp;
00142       
00143    {
00144       int return_val = 
00145          process_arguments( argc, argv, min_supp, 
00146                             relative, relminsupp, maxsize );
00147       if(return_val)
00148          return return_val;
00149    }
00150 
00151    char *trcache_option=argv[1];
00152    char* input_file = argv[2];
00153    char* output_file = argv[4];
00154    char* ordering_option = argv[5];
00155    char* nee_option = argv[6];
00156 
00157    try
00158    {
00159       // We assume that the transactions does not contain duplicates!!!
00160       typedef brBufferedTransactionReader< > T_R;
00161       // Otherwise uncomment this:
00162       // typedef SortedTransactionReader<brBufferedTransactionReader< >, true> T_R;
00163 
00164       T_R::params_t par_i;
00165       par_i.file_name = input_file;
00166       par_i.mode=FileReprBase::READ;
00167       par_i.file_buffer_size = 16 * 1024;
00168       T_R tr_reader(&par_i);
00169       std::vector< std::pair<counter_t, item_t> > freq_items_with_counters;
00170       counter_t nr_of_transactions;
00171       // The first step of each algorithms is determining the frequent items.
00172       FrequentFilter<T_R>
00173          fr_filter(tr_reader);
00174       log_status(0,"Finding frequent items.");
00175       fr_filter.findFrequentItems( freq_items_with_counters,  
00176                                    nr_of_transactions, min_supp);
00177 
00178       if(!freq_items_with_counters.empty())
00179       {
00180          log_status(0,"Doing decoder.");
00181          typedef DFDecoderWithEEManagement< > DF_D;
00182          DF_D::params_t par_d;
00183          par_d.file_name = output_file;
00184          par_d.mode=FileReprBase::WRITE;
00185 //          par_d.numfreq = freq_items_with_counters.size(); // If StatOutput is used!!!
00186          DF_D df_decoder(&par_d);
00187 
00188          log_status(0,"APRIORI is selected");
00189          log_status(0,"deadend pruning option is on.");
00190          typedef Bodon::LeafWithoutConstructor LEAF_WC;  
00191          typedef Bodon::Leaf LEAF;       
00192 
00193          typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelist<std::vector<Edge> > > TRIE_OEL;
00194          typedef Bodon::Trie< LEAF, Bodon::OffsetIndexVector< std::vector<void*> > > TRIE_OI;
00195 
00196          if(strstr( nee_option,"off"))
00197          {
00198             const NEELevel NEE = NEE_Off;                
00199             log_status(0,"Equisupport pruning is disabled");
00200             TRIE_OEL main_trie;
00201             AprioriSelector2<TRIE_OEL, TRIE_OI, LEAF_WC, T_R, DF_D, NEE>(
00202                min_supp, input_file, nr_of_transactions, 
00203                freq_items_with_counters, tr_reader, df_decoder,
00204                main_trie, trcache_option, ordering_option, maxsize);
00205          }
00206          else
00207          {
00208             const NEELevel NEE = NEE_Full;               
00209             log_status(0,"Level 3 equisupport pruning is enabled");
00210             typedef Bodon::TrieNEE<TRIE_OEL> TRIENEE_OEL;
00211             typedef Bodon::TrieNEE<TRIE_OI> TRIENEE_OI;
00212             TRIENEE_OEL main_trie;
00213             AprioriSelector2<TRIENEE_OEL, TRIENEE_OI, LEAF_WC, T_R, DF_D, NEE>(
00214                min_supp, input_file, nr_of_transactions, 
00215                freq_items_with_counters, tr_reader, df_decoder,
00216                main_trie, trcache_option, ordering_option, maxsize);
00217          }
00218       }
00219    }
00220    catch (std::ios_base::failure e)
00221    {
00222       log_err(0,"Exiting the program due to IO exception");
00223       return 1;
00224    }
00225 }
00226 
00227 

Generated on Sun Sep 17 17:50:40 2006 for FIM environment by  doxygen 1.4.4