Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

findLeafRepresentation.cpp

Go to the documentation of this file.
00001 #include "common.hpp"
00002 #include "io/input/transaction_reader/brBufferedTransactionReader.hpp"
00003 #include "io/input/transaction_reader/SortedTransactionReader.hpp"
00004 
00005 #include "io/output/BufferedOutput.hpp"
00006 #include "io/codec/decoder/df/DFDecoderWithEEManagement.hpp"
00007 
00008 #include "util/StreamParser.hpp"
00009 #include "util/FrequentFilter.cpp"
00010 
00011 #include "datastructures/maxvector.hpp"
00012 
00013 #include "test/apriori/bodon/LeafRepresentationSelector.hpp"
00014 
00015 #include "datastructures/trie/edgelist/OrderedEdgelist.hpp"
00016 #include "apriori/bodon/Trie.hpp"
00017 #include "apriori/bodon/TrieNEE.hpp"
00018 
00019 #include <vector>
00020 #include <iostream>
00021 #include <string>
00022 
00023 
00024 std::string file_format;
00025 
00026 void init()
00027 {
00028    file_format = "File format:";
00029    file_format += "\n\nThe transactionfile is a plan text file. Each row ";
00030    file_format += "represents a transaction. \n";
00031    file_format += "A transaction is a set of items seperated by a nonnumeric ";
00032    file_format += "character.\nIt can be for example a white space, comma, ";
00033    file_format += "colon, etc.\n";
00034    file_format += "Items are nonnegative integers.\n";
00035 }
00037 void usage()
00038 {
00039    std::cerr << "\nUsage: findLeafRepresentation leaf_type transactionfile min_supp ";
00040    std::cerr << "outcomefile [options]\n";
00041    std::cerr << "\n leaf_tyep\t  the type of the leaf, i.e: homo,";
00042    std::cerr << " inhomo_new, inhomo_alloc_1K, inhomo_alloc_8K, inhomo_alloc_64K, inhomo_alloc_512K \n ";
00043    std::cerr << "\n transactionfile  file, that contains the transactions of items";
00044    std::cerr << "\n outcomefile\t  file to write the outcome";
00045    std::cerr << "\n min_supp\t  absolute support threshold";
00046 
00047    std::cerr << file_format;
00048    std::cerr << "\n\nHave a succesful mining ;-)"<<std::endl<<std::endl;
00049 }
00050 
00062 int process_arguments( int argc, char *argv[],
00063                        counter_t& min_supp, bool &isrel, double &relminsupp )
00064 {
00065    if ( argc < 5 )
00066    {
00067       usage();
00068       log_err(0,"There are 5 mandatory arguments.");
00069       return 2;
00070    }
00071    std::string mins=argv[3];
00072    if (mins[mins.size()-1]=='%') {
00073       mins.erase(mins.size()-1);
00074       isrel=true;
00075       relminsupp=atof(mins.c_str());
00076       relminsupp/=100;
00077       log_info(0,"Using relative minimum support of %lg",relminsupp);
00078       return 0;
00079    }
00080    isrel=false;
00081 
00082    int min_supp_i;
00083    try
00084    {
00085       convert(argv[3], min_supp_i);
00086       if ( min_supp_i <= 0  )
00087       {
00088          log_err(0,"%s cannot be converted to a positive integer.",argv[3]);
00089          return 3;
00090       }
00091    }
00092    catch(BadConversion e)
00093    {
00094       log_err(0,"min_supp conversion problem.");
00095       return 3;
00096    }
00097    min_supp = static_cast<counter_t>(min_supp_i);
00098    log_info(0,"min_supp is set to %d", min_supp);
00099    return 0;
00100 }
00101 
00102 int main( int argc, char *argv[] )
00103 {
00104    init();
00105    counter_t min_supp;
00106    bool relative;
00107    double relminsupp;
00108       
00109    {
00110       int return_val = 
00111          process_arguments( argc, argv, min_supp, relative, relminsupp );
00112       if(return_val)
00113          return return_val;
00114    }
00115 
00116    char* leaf_repr = argv[1];
00117    char* input_file = argv[2];
00118    char* output_file = argv[4];
00119 
00120    try
00121    {
00122       typedef brBufferedTransactionReader< > T_R;
00123       T_R::params_t par_i;
00124       par_i.file_name = input_file;
00125       par_i.mode=FileReprBase::READ;
00126       par_i.file_buffer_size = 16 * 1024;
00127       T_R tr_reader(&par_i);
00128       std::vector< std::pair<counter_t, item_t> > freq_items_with_counters;
00129       counter_t nr_of_transactions;
00130       FrequentFilter<T_R>
00131          fr_filter(tr_reader);
00132       log_status(0,"Finding frequent items.");
00133       fr_filter.findFrequentItems( freq_items_with_counters,   
00134                                    nr_of_transactions, min_supp);
00135 
00136       log_status(0,"Doing decoder.");
00137       typedef DFDecoderWithEEManagement< > DF_D;
00138 
00139       DF_D::params_t par_d;
00140       par_d.file_name = output_file;
00141       par_d.mode=FileReprBase::WRITE;
00142       DF_D df_decoder(&par_d);
00143 
00144       typedef Bodon::LeafWithoutConstructor LEAF_WC;     
00145       typedef Bodon::Leaf LEAF;  
00146       typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelist<> > TRIE_BASE;
00147       typedef Bodon::TrieNEE< TRIE_BASE > TRIE;
00148 /*      if(strcmp(leaf_repr,"homo_old"))
00149       {
00150          log_info(0,"homo_old representation is selected pruning is selected.");
00151          LeafRepresentationSelectorOld<TRIE, LEAF_WC, T_R, DF_D>( 
00152             min_supp, input_file, nr_of_transactions, 
00153             freq_items_with_counters, tr_reader, df_decoder);
00154       }
00155       else */
00156       if(!strcmp(leaf_repr,"homo"))
00157       {
00158          log_info(0,"homo representation is selected pruning is selected.");
00159          typedef NewWrapperAlloc<TRIE> LEAF_ALLOCATOR;
00160          LeafRepresentationSelector<TRIE, TRIE, LEAF_ALLOCATOR, T_R, DF_D>( 
00161             min_supp, input_file, nr_of_transactions, 
00162             freq_items_with_counters, tr_reader, df_decoder);
00163       }
00164       else if(!strcmp(leaf_repr,"inhomo_new"))
00165       {
00166          log_info(0,"inhomo_new representation is selected pruning is selected.");
00167          typedef NewWrapperAlloc<LEAF> LEAF_ALLOCATOR;
00168          LeafRepresentationSelector<TRIE, LEAF, LEAF_ALLOCATOR, T_R, DF_D>( 
00169             min_supp, input_file, nr_of_transactions, 
00170             freq_items_with_counters, tr_reader, df_decoder);
00171       }
00172       else if(!strcmp(leaf_repr,"inhomo_alloc_1K"))
00173       {
00174          log_info(0,"inhomo_alloc_1K representation is selected pruning is selected.");
00175          typedef bracz::singleualloc<LEAF_WC, 1024> LEAF_ALLOCATOR;
00176          LeafRepresentationSelector<TRIE, LEAF_WC, LEAF_ALLOCATOR, T_R, DF_D>( 
00177             min_supp, input_file, nr_of_transactions, 
00178             freq_items_with_counters, tr_reader, df_decoder);
00179       }
00180       else if(!strcmp(leaf_repr,"inhomo_alloc_8K"))
00181       {
00182          log_info(0,"inhomo_alloc_8K representation is selected pruning is selected.");
00183          typedef bracz::singleualloc<LEAF_WC, 8196> LEAF_ALLOCATOR;
00184          LeafRepresentationSelector<TRIE, LEAF_WC, LEAF_ALLOCATOR, T_R, DF_D>( 
00185             min_supp, input_file, nr_of_transactions, 
00186             freq_items_with_counters, tr_reader, df_decoder);
00187       }
00188       else if(!strcmp(leaf_repr,"inhomo_alloc_64K"))
00189       {
00190          log_info(0,"inhomo_alloc_64K representation is selected pruning is selected.");
00191          typedef bracz::singleualloc<LEAF_WC, 65536> LEAF_ALLOCATOR;
00192          LeafRepresentationSelector<TRIE, LEAF_WC, LEAF_ALLOCATOR, T_R, DF_D>( 
00193             min_supp, input_file, nr_of_transactions, 
00194             freq_items_with_counters, tr_reader, df_decoder);
00195       }
00196       else if(!strcmp(leaf_repr,"inhomo_alloc_512K"))
00197       {
00198          log_info(0,"inhomo_alloc_512K representation is selected pruning is selected.");
00199          typedef bracz::singleualloc<LEAF_WC, 524288> LEAF_ALLOCATOR;
00200          LeafRepresentationSelector<TRIE, LEAF_WC, LEAF_ALLOCATOR, T_R, DF_D>( 
00201             min_supp, input_file, nr_of_transactions, 
00202             freq_items_with_counters, tr_reader, df_decoder);
00203       }
00204       else
00205       {
00206          usage();
00207          log_err(0,"leaf_type should be either homo, inhomo_new, inhomo_alloc_1K, , inhomo_alloc_8K, inhomo_alloc_64K, inhomo_alloc_512K!");
00208          return 1;
00209       }
00210    }
00211    catch (std::ios_base::failure e)
00212    {
00213       log_err(0,"Exiting the program due to IO exception");
00214       return 1;
00215    }
00216 }
00217 
00218 

Generated on Sun Sep 17 17:50:38 2006 for FIM environment by  doxygen 1.4.4