00001
00002
00003
00004
00005
00006
00007
00008
00009 #include "Apriori.hpp"
00010 #include <iostream>
00011 #include <vector>
00012 #include <set>
00013 #include <cmath>
00014
00015 using namespace std;
00016
00021 void Apriori::support( const itemtype& candidate_size )
00022 {
00023 set<itemtype> basket;
00024 vector<itemtype> basket_v;
00025 if( store_input )
00026 {
00027 if (candidate_size == 2)
00028 {
00029 while( input_output_manager.read_in_a_line( basket ) )
00030 {
00031 input_output_manager.basket_recode( basket, basket_v );
00032 if (basket_v.size()>1) reduced_baskets[basket_v]++;
00033 }
00034 }
00035 for (map<vector<itemtype>,unsigned long>::iterator it =
00036 reduced_baskets.begin(); it!=reduced_baskets.end();it++)
00037 apriori_trie->find_candidate(it->first,candidate_size,it->second);
00038 }
00039 else while( input_output_manager.read_in_a_line( basket ) )
00040 {
00041 input_output_manager.basket_recode(basket, basket_v);
00042 apriori_trie->find_candidate(basket_v,candidate_size);
00043 }
00044 }
00053 Apriori::Apriori( ifstream& basket_file, const char* output_file_name,
00054 const bool store_input):
00055 input_output_manager(basket_file, output_file_name ),
00056 store_input(store_input)
00057 {
00058 }
00059
00069 void Apriori::APRIORI_alg( const double min_supp, const double min_conf,
00070 const bool quiet,
00071 const unsigned long size_threshold )
00072 {
00073 unsigned long basket_number;
00074 if(!quiet) cout<<endl<<"\t\tFinding frequent itemsets..."<<endl<<endl;
00075 itemtype candidate_size=1;
00076 itemtype longest_path,longest_path_after_delete=1;
00077 if(!quiet)
00078 {
00079 cout<<endl<<"Determining the support of the items";
00080 cout<<" and deleting infrequent ones!"<<endl;
00081 }
00082 vector<unsigned long> support_of_items;
00083 basket_number = input_output_manager.find_frequent_items(
00084 min_supp, support_of_items );
00085 apriori_trie = new Apriori_Trie( basket_number );
00086 apriori_trie->insert_frequent_items( support_of_items );
00087
00088
00089
00090 double min_supp_abs = min_supp * basket_number;
00091 longest_path_after_delete = apriori_trie->longest_path();
00092
00093
00094 longest_path=apriori_trie->longest_path();
00095 candidate_size++;
00096 if(!quiet)
00097 {
00098 cout<<endl<<"Genarating "<<candidate_size;
00099 cout<<"-itemset candidates!"<<endl;
00100 }
00101 apriori_trie->candidate_generation(candidate_size-1);
00102
00103
00104 while( longest_path<apriori_trie->longest_path() )
00105 {
00106 input_output_manager.rewind();
00107 if(!quiet)
00108 {
00109 cout<<"Determining the support of the "<<candidate_size;
00110 cout<<"-itemset candidates!"<<endl;
00111 }
00112 support( candidate_size );
00113
00114
00115 if(!quiet) cout<<"Deleting infrequent itemsets!"<<endl;
00116 apriori_trie->delete_infrequent(min_supp_abs, candidate_size);
00117 longest_path_after_delete=apriori_trie->longest_path();
00118
00119
00120 if (candidate_size == size_threshold )
00121 {
00122 if(!quiet) cout<<"Size threshold is reached!"<<endl;
00123 break;
00124 }
00125 longest_path=apriori_trie->longest_path();
00126 candidate_size++;
00127 if( !quiet )
00128 {
00129 cout<<endl<<"Genarating "<<candidate_size;
00130 cout<<"-itemset candidates!"<<endl;
00131 }
00132 apriori_trie->candidate_generation(candidate_size-1);
00133
00134
00135 }
00136 apriori_trie->write_content_to_file( input_output_manager );
00137 if (min_conf)
00138 {
00139 if(!quiet) cout<<"\nGenerating association rules...!\n";
00140 apriori_trie->association( min_conf, input_output_manager );
00141 }
00142 if(!quiet) cout<<"\nMining is done!\n";
00143 }
00144
00145 Apriori::~Apriori()
00146 {
00147 delete apriori_trie;
00148 }