Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

findThreshold.cpp

Go to the documentation of this file.
00001 #include "common.hpp"
00002 #include "io/input/transaction_reader/brBufferedTransactionReader.hpp"
00003 #include "io/input/transaction_reader/SortedTransactionReader.hpp"
00004 
00005 #include "io/output/BufferedOutput.hpp"
00006 #include "io/codec/decoder/df/CacheDFDecoder.hpp"
00007 
00008 #include "util/StreamParser.hpp"
00009 #include "util/FrequentFilter.cpp"
00010 
00011 #include "datastructures/maxvector.hpp"
00012 
00013 #include "test/apriori/bodon/ThresholdSelector.hpp"
00014 
00015 #include <vector>
00016 #include <iostream>
00017 #include <string>
00018 
00019 
00020 std::string file_format;
00021 
00022 void init()
00023 {
00024    file_format = "File format:";
00025    file_format += "\n\nThe transactionfile is a plan text file. Each row ";
00026    file_format += "represents a transaction. \n";
00027    file_format += "A transaction is a set of items seperated by a nonnumeric ";
00028    file_format += "character.\nIt can be for example a white space, comma, ";
00029    file_format += "colon, etc.\n";
00030    file_format += "Items are nonnegative integers.\n";
00031 }
00033 void usage()
00034 {
00035    std::cerr << "\nUsage: findThreshold threshold vector_repr transactionfile min_supp ";
00036    std::cerr << "outcomefile [options]\n";
00037    std::cerr << "\n threshold\t  the threshold in command find, i.e: ";
00038    std::cerr << " the number of children above binary search is applied\n ";
00039    std::cerr << " vector_repr\t  the representation of the vector, i.e: ";
00040    std::cerr << "maxvector, stl::vector, \n";
00041    std::cerr << "\n transactionfile  file, that contains the transactions of items";
00042    std::cerr << "\n outcomefile\t  file to write the outcome";
00043    std::cerr << "\n min_supp\t  absolute support threshold";
00044 
00045    std::cerr << file_format;
00046    std::cerr << "\n\nHave a succesful mining ;-)"<<std::endl<<std::endl;
00047 }
00048 
00060 int process_arguments( int argc, char *argv[],
00061                        counter_t& min_supp, bool &isrel, double &relminsupp )
00062 {
00063    if ( argc < 6 )
00064    {
00065       usage();
00066       log_err(0,"There are 5 mandatory arguments.");
00067       return 2;
00068    }
00069    std::string mins=argv[4];
00070    if (mins[mins.size()-1]=='%') {
00071       mins.erase(mins.size()-1);
00072       isrel=true;
00073       relminsupp=atof(mins.c_str());
00074       relminsupp/=100;
00075       log_info(0,"Using relative minimum support of %lg",relminsupp);
00076       return 0;
00077    }
00078    isrel=false;
00079 
00080    int min_supp_i;
00081    try
00082    {
00083       convert(argv[4], min_supp_i);
00084       if ( min_supp_i <= 0  )
00085       {
00086          log_err(0,"%s cannot be converted to a positive integer.",argv[3]);
00087          return 3;
00088       }
00089    }
00090    catch(BadConversion e)
00091    {
00092       log_err(0,"min_supp conversion problem.");
00093       return 3;
00094    }
00095    min_supp = static_cast<counter_t>(min_supp_i);
00096    log_info(0,"min_supp is set to %d", min_supp);
00097    return 0;
00098 }
00099 
00100 int main( int argc, char *argv[] )
00101 {
00102    init();
00103    counter_t min_supp;
00104    bool relative;
00105    double relminsupp;
00106       
00107    {
00108       int return_val = 
00109          process_arguments( argc, argv, min_supp, relative, relminsupp );
00110       if(return_val)
00111          return return_val;
00112    }
00113 
00114    char* threshold = argv[1];
00115    char* input_file = argv[3];
00116    char* output_file = argv[5];
00117 
00118    try
00119    {
00120       // We assume that the transactions does not contain duplicates!!!
00121       typedef brBufferedTransactionReader< > T_R;
00122       // Otherwise uncmment this:
00123       // typedef SortedTransactionReader<brBufferedTransactionReader< >, true> T_R;
00124 
00125       T_R::params_t par_i;
00126       par_i.file_name = input_file;
00127       par_i.mode=FileReprBase::READ;
00128       par_i.file_buffer_size = 16 * 1024;
00129       T_R tr_reader(&par_i);
00130       std::vector< std::pair<counter_t, item_t> > freq_items_with_counters;
00131       counter_t nr_of_transactions;
00132       // The first step of each algorithms is determining the frequent items.
00133       FrequentFilter<T_R>
00134          fr_filter(tr_reader);
00135       log_status(0,"Finding frequent items.");
00136       fr_filter.findFrequentItems( freq_items_with_counters,   
00137                                    nr_of_transactions, min_supp);
00138 
00139       log_status(0,"Doing decoder.");
00140       typedef CacheDFDecoder< OutputBase<FDRepr> > DF_D;
00141 
00142       DF_D::params_t par_d;
00143       par_d.file_name = output_file;
00144       par_d.mode=FileReprBase::WRITE;
00145       DF_D df_decoder(&par_d);
00146 
00147       if(strcmp(argv[2],"maxvector")==0)
00148       {
00149          log_info(0,"maxvector vector representation is selected");
00150          ThresholdSelector<maxvector<Edge>, T_R, DF_D>( threshold, min_supp, input_file, nr_of_transactions, 
00151                                                         freq_items_with_counters, tr_reader, df_decoder);
00152       }
00153       else if(strcmp(argv[2],"std::vector")==0)
00154       {
00155          log_info(0,"std::vector vector representation is selected");
00156          ThresholdSelector<std::vector<Edge>, T_R, DF_D>( threshold, min_supp, input_file, nr_of_transactions, 
00157                                                           freq_items_with_counters, tr_reader, df_decoder);
00158       }
00159       else
00160       {
00161          usage();
00162          log_err(0,"edge_repr should be either ordered_list, offsetindex!");
00163          return 1;
00164       }
00165    }
00166    catch (std::ios_base::failure e)
00167    {
00168       log_err(0,"Exiting the program due to IO exception");
00169       return 1;
00170    }
00171 }
00172 
00173 

Generated on Sun Sep 17 17:50:38 2006 for FIM environment by  doxygen 1.4.4