Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

main.cpp

Go to the documentation of this file.
00001 
00028 #include "common.hpp"
00029 #include "common/log.h"
00030 #include "io/input/transaction_reader/brBufferedTransactionReader.hpp"
00031 #include "io/input/transaction_reader/SortedTransactionReader.hpp" 
00032 
00033 
00034 #include "io/codec/decoder/df/SimpleDFDecoder.hpp"
00035 //#include "io/codec/decoder/df/CacheDFDecoder.hpp"
00036 #include "io/codec/decoder/df/DFDecoderWithEEManagement.hpp"
00037 //#include "io/output/StatOutput.hpp"
00038 #include "io/output/normal/SortOutput.hpp"
00039 //#include "io/codec/decoder/df/DFDecoderWrapper.hpp"
00040 
00041 #include "util/StreamParser.hpp"
00042 #include "util/FrequentFilter.cpp"
00043 
00044 #include "datastructures/maxvector.hpp"
00045 #include "datastructures/trie/edgelist/OrderedEdgelist.hpp"
00046 //#include "datastructures/trie/edgelist/OrderedEdgelistDynLookup.hpp"
00047 #include "datastructures/trie/edgelist/OffsetIndexVector.hpp"
00048 
00049 #include "apriori/bodon/Leaf.hpp"
00050 #include "apriori/bodon/Trie.hpp"
00051 #include "apriori/bodon/TrieNEE.hpp"
00052 #include "io/input/transaction_reader/OrderReverser.hpp"
00053 #include "io/codec/coder/Coder.hpp"
00054 #include "io/db_cache/BuildTreeDBCache.hpp"
00055 #include "util/Frequent2Filter.cpp"
00056 #include "util/Frequent2FilterOnline.cpp"
00057 #include "apriori/OneByOneSupportCounter.hpp"
00058 #include "apriori/bodon/dynamic_trie/trie_manipulators/SupportCounter.hpp"
00059 #include "apriori/AprioriSelector.hpp"
00060 #include "fpgrowth/FPGrowthSelector.hpp"
00061 #include <vector>
00062 #include <iostream>
00063 #include <string>
00064 
00065 
00066 std::string file_format;
00067 
00068 void init()
00069 {
00070    file_format = "File format:";
00071    file_format += "\n\nThe transactionfile is a plan text file. Each row ";
00072    file_format += "represents a transaction. \n";
00073    file_format += "A transaction is a set of items seperated by a nonnumeric ";
00074    file_format += "character.\nIt can be for example a white space, comma, ";
00075    file_format += "colon, etc.\n";
00076    file_format += "Items are nonnegative integers.\n";
00077 }
00079 void usage()
00080 {
00081    std::cerr<<"\nUsage: fim algorithm transactionfile min_supp outcomefile [maxsize]\n";
00082    std::cerr<<" algorithm\t    the name of the algorithm, i.e: apriori, eclat or fp-growth\n";
00083    std::cerr<<" transactionfile\t    file, that contains the tranasctions of items\n";
00084    std::cerr<<" outcomefile\t    file to write the outcome\n";
00085    std::cerr<<" min_supp\t    absolute support threshold\n";
00086    std::cerr<<" maxsize\t    the upper limit of the size of the frequent sets\n";
00087 
00088    std::cerr << file_format;
00089    std::cerr<<"\n\t\t\tHave a succesful mining ;-)\n\n";
00090 }
00091 
00103 int process_arguments( int argc, char *argv[], counter_t& min_supp, 
00104                        bool &isrel, double &relminsupp, unsigned int& maxsize )
00105 {
00106    if ( argc < 5 )
00107    {
00108      log_err(0,"There are 4 mandatory arguments!");
00109      usage();
00110      return 2;
00111    }
00112    std::string mins=argv[3];
00113    if (mins[mins.size()-1]=='%') {
00114      mins.erase(mins.size()-1);
00115      isrel=true;
00116      relminsupp=atof(mins.c_str());
00117      relminsupp/=100;
00118      log_info(0,"Using relative minimum support of %lg",relminsupp);
00119      return 0;
00120    }
00121    isrel=false; relminsupp=0;
00122    int min_supp_i;
00123    try
00124    {
00125       convert(argv[3], min_supp_i);
00126       if ( min_supp_i <= 0  )
00127       {
00128          log_err(0,"%s cannot be converted to a positive integer.",argv[3]);
00129          return 3;
00130       }
00131    }
00132    catch(BadConversion e)
00133    {
00134       log_err(0,"min_supp conversion problem.");
00135       return 3;
00136    }
00137    min_supp = static_cast<counter_t>(min_supp_i);
00138    log_info(0,"min_supp is set to %d", min_supp);
00139    if(argc == 6)
00140    {
00141       int maxsize_i;
00142       try
00143       {
00144          convert(argv[5], maxsize_i);
00145          if ( maxsize_i <= 0  )
00146          {
00147             log_err(0,"%s cannot be converted to a positive integer.",argv[5]);
00148             return 4;
00149          }
00150       }
00151       catch(BadConversion e)
00152       {
00153          log_err(0,"max_size conversion problem.");
00154          return 4;
00155       }
00156       maxsize = static_cast<unsigned int>(maxsize_i);
00157       log_status(0,"maxsize is set to %d", maxsize);
00158    }
00159    else
00160       maxsize = largest_itemsetsize;
00161    return 0;
00162 }
00163 
00164 int main( int argc, char *argv[] )
00165 {
00166    init();
00167    counter_t min_supp;
00168    unsigned int maxsize;
00169    bool relative;
00170    double relminsupp;
00171       
00172    {
00173       int return_val = 
00174          process_arguments( argc, argv, min_supp, 
00175                             relative, relminsupp, maxsize );
00176       if(return_val)
00177          return return_val;
00178    }
00179 
00180    char* algorithm = argv[1];
00181    char* input_file = argv[2];
00182    char* output_file = argv[4];
00183 
00184    try
00185    {
00186       // We assume that the transactions does not contain duplicates!!!
00187       typedef brBufferedTransactionReader< > T_R;
00188       // Otherwise uncomment this:
00189       // typedef SortedTransactionReader<brBufferedTransactionReader< >, true> T_R;
00190 
00191       T_R::params_t par_i;
00192       par_i.file_name = input_file;
00193       par_i.mode=FileReprBase::READ;
00194       par_i.file_buffer_size = 16 * 1024;
00195       T_R tr_reader(&par_i);
00196       std::vector< std::pair<counter_t, item_t> > freq_items_with_counters;
00197       counter_t nr_of_transactions;
00198       // The first step of each algorithms is determining the frequent items.
00199       FrequentFilter<T_R>
00200          fr_filter(tr_reader);
00201       log_status(0,"Finding frequent items.");
00202       fr_filter.findFrequentItems( freq_items_with_counters,  
00203                                    nr_of_transactions, min_supp);
00204 
00205       if(!freq_items_with_counters.empty())
00206       {
00207          log_status(0,"Doing decoder.");
00208          typedef DFDecoderWithEEManagement< > DF_D;
00209 //          typedef DFDecoderWithEEManagement< SimpleDFDecoder<SortOutput<> > > DF_D;
00210 //          typedef DFDecoderWithEEManagement< StatOutput<CacheDFDecoder< >, true> > DF_D;
00211 
00212          DF_D::params_t par_d;
00213          par_d.file_name = output_file;
00214          par_d.mode=FileReprBase::WRITE;
00215 //          par_d.numfreq = freq_items_with_counters.size(); // If StatOutput is used!!!
00216          DF_D df_decoder(&par_d);
00217 
00218          if(strncmp(argv[1],"apriori",7) == 0)
00219          {
00220             log_status(0,"APRIORI is selected");
00221             typedef Bodon::LeafWithoutConstructor LEAF_WC;       
00222             typedef Bodon::Leaf LEAF;    
00223             typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelist<std::vector<Edge> > > TRIE_OEL;
00224 //             typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<std::vector<Edge>, 30 > > TRIE_OEL;
00225             typedef Bodon::Trie< LEAF, Bodon::OffsetIndexVector< std::vector<void*> > > TRIE_OI;
00226 
00227             typedef Bodon::TrieNEE<TRIE_OEL> TRIENEE_OEL;
00228             typedef Bodon::TrieNEE<TRIE_OI> TRIENEE_OI;
00229 
00230             const NEELevel NEE = NEE_Full;
00231             log_status(0,"NEE_FULL is enabled");
00232 /*          if(strstr(argv[1],"mergetries"))
00233             {
00234                log_status(0,"Mergetrie option is on.");
00235                typedef SortedTransactionReader< Coder<T_R, DF_D>, false> S_C_T_R;
00236                const bool ENDONLY = false;
00237                typedef bracz::BuildTreeDBCache< 
00238                   S_C_T_R, std::vector<item_t>, bracz::EndPatriciaBuildTree<ENDONLY>, ENDONLY, true > S_C;
00239                typedef Bodon::dynamic_trie::SupportCounterMergeTries<TRIE_OEL, TRIE_OI, S_C> SUPP_C;
00240                typedef Frequent2Filter<S_C> F2F;
00241                AprioriSelector<TRIENEE_OEL, TRIENEE_OI, LEAF_WC, S_C, F2F, SUPP_C, T_R, DF_D, NEE>( 
00242                   min_supp, algorithm, input_file, nr_of_transactions, 
00243                   freq_items_with_counters, tr_reader, df_decoder, maxsize);
00244 
00245             }
00246             else*/ if(strstr(argv[1],"lowmem"))
00247             {
00248                log_status(0,"Low memory need option is on.");
00249                typedef SortedTransactionReader<Coder<T_R, DF_D> >  S_C;
00250                typedef Bodon::dynamic_trie::SupportCounter<TRIE_OEL, TRIE_OI> SUPP_C_BASE;
00251                typedef OneByOneSupportCounter<TRIE_OEL, S_C, SUPP_C_BASE> SUPP_C;
00252                typedef Frequent2FilterOnline<S_C> F2F;
00253                AprioriSelector<TRIENEE_OEL, TRIENEE_OI, LEAF_WC, S_C, F2F, SUPP_C, T_R, DF_D, NEE>( 
00254                   min_supp, "apriori-intersect", input_file, nr_of_transactions, 
00255                   freq_items_with_counters, tr_reader, df_decoder, maxsize);
00256             }
00257             else 
00258             {
00259                typedef SortedTransactionReader< Coder<T_R, DF_D>, false, false > S_C_T_R;
00260                const bool ENDONLY = true;
00261                typedef OrderReverser< bracz::BuildTreeDBCache< 
00262                S_C_T_R, std::vector<item_t>, bracz::EndPatriciaBuildTree<ENDONLY>, ENDONLY > >S_C;
00263                typedef Bodon::dynamic_trie::SupportCounter<TRIE_OEL, TRIE_OI> SUPP_C_BASE;
00264                typedef OneByOneSupportCounter<TRIE_OEL, S_C, SUPP_C_BASE> SUPP_C;
00265                typedef Frequent2Filter<S_C> F2F;
00266                AprioriSelector<TRIENEE_OEL, TRIENEE_OI, LEAF_WC, S_C, 
00267                   F2F, SUPP_C, T_R, DF_D, NEE>( 
00268                   min_supp, algorithm, input_file, nr_of_transactions, 
00269                   freq_items_with_counters, tr_reader, df_decoder, maxsize);
00270             }
00271          }
00272          else if(strncmp(argv[1],"fp-growth",9) == 0)
00273             FPGrowthSelector<T_R, DF_D>(
00274                min_supp, relative, relminsupp, algorithm, input_file, 
00275                nr_of_transactions, freq_items_with_counters, tr_reader, df_decoder);
00276          else
00277          {
00278             usage();
00279             log_err(0,"algorithm should be either apriori, fp-growth or eclat!");
00280             return 1;
00281          }
00282       }
00283    }
00284    catch (std::ios_base::failure e)
00285    {
00286       log_err(0,"Exiting the program due to IO exception");
00287       return 1;
00288    }
00289 }
00290 
00291 

Generated on Sun Sep 17 17:50:39 2006 for FIM environment by  doxygen 1.4.4