Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

ThresholdSelector.hpp

Go to the documentation of this file.
00001 #ifndef ThresholdSelector_HPP
00002 #define ThresholdSelector_HPP
00003 
00004 #include "io/input/transaction_reader/SortedTransactionReader.hpp"
00005 #include "io/codec/coder/Coder.hpp"
00006 #include "io/db_cache/BuildTreeDBCache.hpp"
00007 #include "util/Frequent2Filter.cpp"
00008 
00009 #include "util/StreamParser.hpp"
00010 
00011 #include "datastructures/trie/edgelist/OrderedEdgelist.hpp"
00012 #include "datastructures/trie/edgelist/OrderedEdgelistDynLookup.hpp"
00013 #include "apriori/bodon/Trie.hpp"
00014 
00015 #include "apriori/bodon/trie/trie_manipulators/FrequentItemInserter.hpp"
00016 #include "apriori/bodon/trie/trie_manipulators/FrequentPairInserter.hpp"
00017 #include "apriori/bodon/trie/trie_manipulators/support_counter/SupportCounterMerge.hpp"
00018 #include "apriori/OneByOneSupportCounter.hpp"
00019 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/CandidateGeneratorPrune.hpp"
00020 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/IntersectProPruner.hpp"
00021 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/InfreqRemover.hpp"
00022 #include "apriori/Apriori.hpp"
00023 
00024 
00025 template <class VECTOR, class T_R, class DF_D> 
00026 class ThresholdSelector
00027 {
00028    public:
00029       ThresholdSelector( char* threshold,
00030          counter_t min_supp, char* input_file,
00031          counter_t nr_of_transactions, 
00032          std::vector< std::pair<counter_t, item_t> >& 
00033          freq_items_with_counters, 
00034          T_R& tr_reader, DF_D& df_decoder);
00035 
00036    private:
00037       std::vector< std::pair<counter_t, std::pair<item_t, item_t> > >
00038       freq_pairs_with_counters;
00039 };
00040 
00041 template <class VECTOR, class T_R, class DF_D>
00042 ThresholdSelector<VECTOR, T_R, DF_D>::ThresholdSelector( char* threshold,
00043    counter_t min_supp, char* input_file,
00044    counter_t nr_of_transactions, 
00045    std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters, 
00046    T_R& tr_reader, DF_D& df_decoder)
00047 {
00048    typedef typename bracz::BuildTreeDBCache< SortedTransactionReader<Coder<T_R, DF_D>, false>,  
00049       std::vector<item_t>, bracz::EndPatriciaBuildTree<true> > S_C;
00050 
00051    typename S_C::params_t par_c;
00052    par_c.file_name = input_file;
00053    par_c.mode=FileReprBase::READ;
00054    par_c.largest_item = tr_reader.getLargestItem();
00055    par_c.decoder = &df_decoder;
00056    par_c.freq_items_with_counters = &freq_items_with_counters;
00057    par_c.codemode = ASC;
00058    log_status(0,"Doing sorted codec.");
00059    S_C sorted_coder(&par_c);
00060 
00061 
00062    Frequent2Filter<S_C> fr_2_filter(
00063       &sorted_coder );
00064    log_status(0,"Finding frequent pairs.")
00065       fr_2_filter.findFrequentPairs(freq_pairs_with_counters, min_supp);
00066 
00067    typedef Bodon::LeafWithoutConstructor LEAF_WC;        
00068    typedef Bodon::Leaf LEAF;     
00069    typedef bracz::singleualloc<LEAF_WC, 1024> LEAF_ALLOCATOR;
00070    LEAF_ALLOCATOR s_alloc;
00071    const NEELevel NEE = NEE_Off;
00072 
00073    if( strcmp(threshold,"3") == 0 )
00074    {
00075       log_info(0,"Threshol is set to 3.");
00076       typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<VECTOR, 3> > TRIE;
00077       TRIE main_trie;
00078       typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE> FII;
00079       FII fii(main_trie, df_decoder);
00080       typedef Bodon::FrequentPairInserter<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> FPI;
00081       typedef Bodon::inhomogeneous_trie::IntersectProPruner<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> PRUNER;
00082       typedef Bodon::inhomogeneous_trie::CandidateGeneratorPrune<PRUNER, DF_D, TRIE, LEAF_ALLOCATOR, NEE> CG;
00083       typedef Bodon::inhomogeneous_trie::InfreqRemover<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> IR;
00084       IR infrequent_remover(main_trie, df_decoder, s_alloc);
00085       typedef Bodon::SupportCounterMerge<TRIE> SUPP_C_BASE;
00086       typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C;
00087       typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A;
00088       A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00089       log_status(0,"Finding frequent itemsets.");
00090       apriori.findFrequentItemsets( 
00091          nr_of_transactions, *par_c.freq_counters,
00092          freq_pairs_with_counters, min_supp );
00093    }
00094    else if( strcmp(threshold,"6") == 0 )
00095    {
00096       log_info(0,"Threshol is set to 6.");
00097       typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<VECTOR, 6> > TRIE;
00098       TRIE main_trie;
00099       typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE> FII;
00100       FII fii(main_trie, df_decoder);
00101       typedef Bodon::FrequentPairInserter<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> FPI;
00102       typedef Bodon::inhomogeneous_trie::IntersectProPruner<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> PRUNER;
00103       typedef Bodon::inhomogeneous_trie::CandidateGeneratorPrune<PRUNER, DF_D, TRIE, LEAF_ALLOCATOR, NEE> CG;
00104       typedef Bodon::inhomogeneous_trie::InfreqRemover<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> IR;
00105       IR infrequent_remover(main_trie, df_decoder, s_alloc);
00106       typedef Bodon::SupportCounterMerge<TRIE> SUPP_C_BASE;
00107       typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C;
00108       typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A;
00109       A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00110       log_status(0,"Finding frequent itemsets.");
00111       apriori.findFrequentItemsets( 
00112          nr_of_transactions, *par_c.freq_counters,
00113          freq_pairs_with_counters, min_supp );
00114    }
00115    else if( strcmp(threshold,"10") == 0 )
00116    {
00117       log_info(0,"Threshol is set to 10.");
00118       typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<VECTOR, 10> > TRIE;
00119       TRIE main_trie;
00120       typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE> FII;
00121       FII fii(main_trie, df_decoder);
00122       typedef Bodon::FrequentPairInserter<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> FPI;
00123       typedef Bodon::inhomogeneous_trie::IntersectProPruner<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> PRUNER;
00124       typedef Bodon::inhomogeneous_trie::CandidateGeneratorPrune<PRUNER, DF_D, TRIE, LEAF_ALLOCATOR, NEE> CG;
00125       typedef Bodon::inhomogeneous_trie::InfreqRemover<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> IR;
00126       IR infrequent_remover(main_trie, df_decoder, s_alloc);
00127       typedef Bodon::SupportCounterMerge<TRIE> SUPP_C_BASE;
00128       typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C;
00129       typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A;
00130       A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00131       log_status(0,"Finding frequent itemsets.");
00132       apriori.findFrequentItemsets( 
00133          nr_of_transactions, *par_c.freq_counters,
00134          freq_pairs_with_counters, min_supp );
00135    }
00136    else if( strcmp(threshold,"20") == 0 )
00137    {
00138       log_info(0,"Threshol is set to 20.");
00139       typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<VECTOR, 20> > TRIE;
00140       TRIE main_trie;
00141       typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE> FII;
00142       FII fii(main_trie, df_decoder);
00143       typedef Bodon::FrequentPairInserter<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> FPI;
00144       typedef Bodon::inhomogeneous_trie::IntersectProPruner<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> PRUNER;
00145       typedef Bodon::inhomogeneous_trie::CandidateGeneratorPrune<PRUNER, DF_D, TRIE, LEAF_ALLOCATOR, NEE> CG;
00146       typedef Bodon::inhomogeneous_trie::InfreqRemover<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> IR;
00147       IR infrequent_remover(main_trie, df_decoder, s_alloc);
00148       typedef Bodon::SupportCounterMerge<TRIE> SUPP_C_BASE;
00149       typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C;
00150       typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A;
00151       A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00152       log_status(0,"Finding frequent itemsets.");
00153       apriori.findFrequentItemsets( 
00154          nr_of_transactions, *par_c.freq_counters,
00155          freq_pairs_with_counters, min_supp );
00156    }
00157    else if( strcmp(threshold,"40") == 0 )
00158    {
00159       log_info(0,"Threshol is set to 40.");
00160       typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<VECTOR, 40> > TRIE;
00161       TRIE main_trie;
00162       typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE> FII;
00163       FII fii(main_trie, df_decoder);
00164       typedef Bodon::FrequentPairInserter<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> FPI;
00165       typedef Bodon::inhomogeneous_trie::IntersectProPruner<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> PRUNER;
00166       typedef Bodon::inhomogeneous_trie::CandidateGeneratorPrune<PRUNER, DF_D, TRIE, LEAF_ALLOCATOR, NEE> CG;
00167       typedef Bodon::inhomogeneous_trie::InfreqRemover<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> IR;
00168       IR infrequent_remover(main_trie, df_decoder, s_alloc);
00169       typedef Bodon::SupportCounterMerge<TRIE> SUPP_C_BASE;
00170       typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C;
00171       typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A;
00172       A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00173       log_status(0,"Finding frequent itemsets.");
00174       apriori.findFrequentItemsets( 
00175          nr_of_transactions, *par_c.freq_counters,
00176          freq_pairs_with_counters, min_supp );
00177    }
00178    else if( strcmp(threshold,"100") == 0 )
00179    {
00180       log_info(0,"Threshol is set to 100.");
00181       typedef Bodon::Trie< LEAF, Bodon::OrderedEdgelistDynLookup<VECTOR, 100> > TRIE;
00182       TRIE main_trie;
00183       typedef Bodon::FrequentItemInserter<DF_D, TRIE, NEE> FII;
00184       FII fii(main_trie, df_decoder);
00185       typedef Bodon::FrequentPairInserter<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> FPI;
00186       typedef Bodon::inhomogeneous_trie::IntersectProPruner<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> PRUNER;
00187       typedef Bodon::inhomogeneous_trie::CandidateGeneratorPrune<PRUNER, DF_D, TRIE, LEAF_ALLOCATOR, NEE> CG;
00188       typedef Bodon::inhomogeneous_trie::InfreqRemover<DF_D, TRIE, LEAF_WC, LEAF_ALLOCATOR, NEE> IR;
00189       IR infrequent_remover(main_trie, df_decoder, s_alloc);
00190       typedef Bodon::SupportCounterMerge<TRIE> SUPP_C_BASE;
00191       typedef OneByOneSupportCounter<TRIE, S_C, SUPP_C_BASE> SUPP_C;
00192       typedef Apriori<S_C, DF_D, TRIE, LEAF_ALLOCATOR, FII, FPI, CG, IR, SUPP_C> A;
00193       A apriori(main_trie, s_alloc, infrequent_remover, sorted_coder, df_decoder, fii);
00194       log_status(0,"Finding frequent itemsets.");
00195       apriori.findFrequentItemsets( 
00196          nr_of_transactions, *par_c.freq_counters,
00197          freq_pairs_with_counters, min_supp );
00198    }
00199    else 
00200       log_warn(0,"Unknown threshold value is requested '%s'.", threshold);
00201 }
00202 #endif

Generated on Sun Sep 17 17:50:40 2006 for FIM environment by  doxygen 1.4.4