Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

routing.cpp

Go to the documentation of this file.
00001 #include "common.hpp"
00002 #include "io/input/transaction_reader/brBufferedTransactionReader.hpp"
00003 #include "io/input/transaction_reader/SortedTransactionReader.hpp"
00004 
00005 #include "io/output/BufferedOutput.hpp"
00006 #include "io/codec/decoder/df/DFDecoderWithEEManagement.hpp"
00007 
00008 #include "util/StreamParser.hpp"
00009 #include "util/FrequentFilter.cpp"
00010 
00011 #include "datastructures/maxvector.hpp"
00012 #include "datastructures/trie/edgelist/OrderedEdgelist.hpp"
00013 //#include "datastructures/trie/edgelist/OrderedEdgelistDynLookup.hpp"
00014 #include "datastructures/trie/edgelist/OffsetIndexVector.hpp"
00015 //#include "datastructures/trie/edgelist/DoubleRepr.hpp"
00016 
00017 #include "apriori/bodon/Trie.hpp"
00018 
00019 #include "test/apriori/bodon/trie_manipulators/RoutingSelector.hpp"
00020 
00021 #include <vector>
00022 #include <iostream>
00023 #include <string>
00024 
00025 
00026 std::string file_format;
00027 
00028 void init()
00029 {
00030    file_format = "File format:";
00031    file_format += "\n\nThe transactionfile is a plan text file. Each row ";
00032    file_format += "represents a transaction. \n";
00033    file_format += "A transaction is a set of items seperated by a nonnumeric ";
00034    file_format += "character.\nIt can be for example a white space, comma, ";
00035    file_format += "colon, etc.\n";
00036    file_format += "Items are nonnegative integers.\n";
00037 }
00039 void usage()
00040 {
00041    std::cerr << "\nUsage: routing routing_strat edge_repr transactionfile min_supp ";
00042    std::cerr << "outcomefile [options]\n";
00043    std::cerr << "\n routing_strat\t  the routing strategy, i.e: ";
00044    std::cerr << "merge, merge2, merge3, lookup_edge,\n\t\t  lookup_edge_prev_mem, ";
00045    std::cerr << "bitvector, indexvector, lookup_tr, hybrid, default\n";
00046    std::cerr << " edge_repr\t  the representation of the edge, i.e: ";
00047    std::cerr << "ordered_list, \n\t\t  offsetindex, hybrid";
00048    std::cerr << "\n transactionfile  file, that contains the tranasctions of items";
00049    std::cerr << "\n min_supp\t  absolute support threshold";
00050    std::cerr << "\n outcomefile\t  file to write the outcome";
00051 
00052    std::cerr << file_format;
00053    std::cerr << "\n\nHave a succesful mining ;-)"<<std::endl<<std::endl;
00054 }
00055 
00066 int process_arguments( int argc, char *argv[], counter_t& min_supp, 
00067                        bool &isrel, double &relminsupp )
00068 {
00069    if ( argc < 6 )
00070    {
00071       usage();
00072       log_err(0,"There are 5 mandatory arguments.");
00073       return 2;
00074    }
00075    std::string mins=argv[4];
00076    if (mins[mins.size()-1]=='%') {
00077       mins.erase(mins.size()-1);
00078       isrel=true;
00079       relminsupp=atof(mins.c_str());
00080       relminsupp/=100;
00081       log_info(0,"Using relative minimum support of %lg",relminsupp);
00082       return 0;
00083    }
00084    isrel=false;
00085    int min_supp_i;
00086    try
00087    {
00088       convert(argv[4], min_supp_i);
00089       if ( min_supp_i <= 0  )
00090       {
00091          log_err(0,"%s cannot be converted to a positive integer.",argv[3]);
00092          return 3;
00093       }
00094    }
00095    catch(BadConversion e)
00096    {
00097       log_err(0,"min_supp conversion problem.");
00098       return 3;
00099    }
00100    min_supp = static_cast<counter_t>(min_supp_i);
00101    log_info(0,"min_supp is set to %d", min_supp);
00102    return 0;
00103 }
00104 
00105 int main( int argc, char *argv[] )
00106 {
00107    init();
00108    counter_t min_supp;
00109    bool relative;
00110    double relminsupp;
00111       
00112    {
00113       int return_val = 
00114          process_arguments( argc, argv, min_supp, relative, relminsupp );
00115       if(return_val)
00116          return return_val;
00117    }
00118 
00119    char* routing = argv[1];
00120    char* input_file = argv[3];
00121    char* output_file = argv[5];
00122 
00123    try
00124    {
00125       // We assume that the transactions does not contain duplicates!!!
00126       typedef brBufferedTransactionReader< > T_R;
00127       // Otherwise uncmment this:
00128       // typedef SortedTransactionReader<brBufferedTransactionReader< >, true> T_R;
00129 
00130       T_R::params_t par_i;
00131       par_i.file_name = input_file;
00132       par_i.mode=FileReprBase::READ;
00133       par_i.file_buffer_size = 16 * 1024;
00134       T_R tr_reader(&par_i);
00135       std::vector< std::pair<counter_t, item_t> > freq_items_with_counters;
00136       counter_t nr_of_transactions;
00137       // The first step of each algorithms is determining the frequent items.
00138       FrequentFilter<T_R>
00139          fr_filter(tr_reader);
00140       log_status(0,"Finding frequent items.");
00141       fr_filter.findFrequentItems( freq_items_with_counters,   
00142                                    nr_of_transactions, min_supp);
00143 
00144       log_status(0,"Doing decoder.");
00145       typedef DFDecoderWithEEManagement< > DF_D;
00146 
00147       DF_D::params_t par_d;
00148       par_d.file_name = output_file;
00149       par_d.mode=FileReprBase::WRITE;
00150       DF_D df_decoder(&par_d);
00151 
00152       typedef Bodon::LeafWithoutConstructor LEAF_WC;     
00153       typedef Bodon::Leaf LEAF;  
00154       typedef bracz::singleualloc<LEAF_WC, 1024> LEAF_ALLOCATOR;
00155 
00156       if(strcmp(argv[2],"ordered_list")==0)
00157       {
00158          log_info(0,"Ordered edgelist representation is selected");
00159          typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelist<std::vector<Edge> > > TRIE;
00160          RoutingSelector<TRIE, LEAF_WC, LEAF_ALLOCATOR, T_R, DF_D>( 
00161             min_supp, routing, input_file, nr_of_transactions, 
00162             freq_items_with_counters, tr_reader, df_decoder );
00163       }
00164       else if(strcmp(argv[2],"offsetindex")==0)
00165       {
00166          log_info(0,"Offsetindex edgelist representation is selected");
00167          typedef Bodon::Trie< LEAF, Bodon::OffsetIndexVector< std::vector<void*> > > TRIE;
00168          RoutingSelectorOffset<TRIE, LEAF_WC, LEAF_ALLOCATOR, T_R, DF_D>( 
00169             min_supp, routing, input_file, nr_of_transactions, 
00170             freq_items_with_counters, tr_reader, df_decoder );
00171       }
00172       else if(strcmp(argv[2],"hybrid")==0)
00173       {
00174          log_info(0,"Hybrid edgelist representation is selected");
00175          typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelist<std::vector<Edge> > > TRIE_OEL;
00176          typedef Bodon::Trie< LEAF, Bodon::OffsetIndexVector< std::vector<void*> > > TRIE_OI;
00177          RoutingSelectorHybrid<TRIE_OEL, TRIE_OI, LEAF_WC, LEAF_ALLOCATOR, T_R, DF_D>( 
00178             min_supp, routing, input_file, nr_of_transactions, 
00179             freq_items_with_counters, tr_reader, df_decoder );
00180       }
00181 /*      else if(strcmp(argv[2],"double_repr")==0)
00182       {
00183          log_info(0,"Double edgelist representation is selected");
00184          typedef Bodon::Trie< LEAF, Bodon::DoubleRepr<maxvector<Edge>, maxvector<void*> > > TRIE;
00185          RoutingSelectorDouble<TRIE, LEAF_WC, LEAF_ALLOCATOR, T_R, DF_D>( 
00186             min_supp, routing, input_file, nr_of_transactions, 
00187             freq_items_with_counters, tr_reader, df_decoder );
00188       }
00189       else if(strstr(argv[2],"ordered_list_dyn"))
00190       {
00191          log_info(0,"Dynamic ordered edgelist representation is selected");
00192          char* threshold = argv[2] + strlen("ordered_list_dyn_");
00193          if(strcmp(threshold, "3") ==0)
00194          {
00195             log_info(0,"Threshold is set to: 3.");
00196             typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<maxvector<Edge>, 3> > TRIE;
00197             RoutingSelector<TRIE, LEAF, LEAF_ALLOCATOR, T_R, DF_D>( 
00198                min_supp, routing, input_file, nr_of_transactions, 
00199                freq_items_with_counters, tr_reader, df_decoder );
00200          }
00201          else if(strcmp(threshold, "5") ==0)
00202          {
00203             log_info(0,"Threshold is set to: 5.");
00204             typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<maxvector<Edge>, 5> > TRIE;
00205             RoutingSelector<TRIE, LEAF, LEAF_ALLOCATOR, T_R, DF_D>( 
00206                min_supp, routing, input_file, nr_of_transactions, 
00207                freq_items_with_counters, tr_reader, df_decoder );
00208          }
00209          else if(strcmp(threshold, "10") ==0)
00210          {
00211          log_info(0,"Threshold is set to: 10.");
00212             typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<maxvector<Edge>, 10> > TRIE;
00213             RoutingSelector<TRIE, LEAF, LEAF_ALLOCATOR, T_R, DF_D>( 
00214                min_supp, routing, input_file, nr_of_transactions, 
00215                freq_items_with_counters, tr_reader, df_decoder );
00216          }
00217          else if(strcmp(threshold, "15") ==0)
00218          {
00219          log_info(0,"Threshold is set to: 15.");
00220             typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<maxvector<Edge>, 15> > TRIE;
00221             RoutingSelector<TRIE, LEAF, LEAF_ALLOCATOR, T_R, DF_D>( 
00222                min_supp, routing, input_file, nr_of_transactions, 
00223                freq_items_with_counters, tr_reader, df_decoder );
00224          }
00225          else if(strcmp(threshold, "20") ==0)
00226          {
00227          log_info(0,"Threshold is set to: 20.");
00228             typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<maxvector<Edge>, 20> > TRIE;
00229             RoutingSelector<TRIE, LEAF, LEAF_ALLOCATOR, T_R, DF_D>( 
00230                min_supp, routing, input_file, nr_of_transactions, 
00231                freq_items_with_counters, tr_reader, df_decoder );
00232          }
00233          else if(strcmp(threshold, "25") ==0)
00234          {
00235          log_info(0,"Threshold is set to: 25.");
00236             typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<maxvector<Edge>, 25> > TRIE;
00237             RoutingSelector<TRIE, LEAF, LEAF_ALLOCATOR, T_R, DF_D>( 
00238                min_supp, routing, input_file, nr_of_transactions, 
00239                freq_items_with_counters, tr_reader, df_decoder );
00240          }
00241          else if(strcmp(threshold, "30") ==0)
00242          {
00243          log_info(0,"Threshold is set to: 30.");
00244             typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<maxvector<Edge>, 30> > TRIE;
00245             RoutingSelector<TRIE, LEAF, LEAF_ALLOCATOR, T_R, DF_D>( 
00246                min_supp, routing, input_file, nr_of_transactions, 
00247                freq_items_with_counters, tr_reader, df_decoder );
00248          }
00249          else if(strcmp(threshold, "100") ==0)
00250          {
00251          log_info(0,"Threshold is set to: 100.");
00252             typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<maxvector<Edge>, 100> > TRIE;
00253             RoutingSelector<TRIE, LEAF, LEAF_ALLOCATOR, T_R, DF_D>( 
00254                min_supp, routing, input_file, nr_of_transactions, 
00255                freq_items_with_counters, tr_reader, df_decoder );
00256                }
00257                }*/
00258       else
00259       {
00260          usage();
00261          log_err(0,"edge_repr should be either ordered_list, offsetindex!");
00262          return 1;
00263       }
00264    }
00265    catch (std::ios_base::failure e)
00266    {
00267       log_err(0,"Exiting the program due to IO exception");
00268       return 1;
00269    }
00270 }
00271 
00272 

Generated on Sun Sep 17 17:50:39 2006 for FIM environment by  doxygen 1.4.4