00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include "Apriori_Trie.hpp"
00011 #include <cstdlib>
00012 #include <algorithm>
00013 #include <iostream>
00014
00019 Apriori_Trie::Apriori_Trie(const unsigned long counter_of_emptyset ):
00020 main_trie(counter_of_emptyset)
00021 {
00022 }
00023
00028 void Apriori_Trie::insert_frequent_items(
00029 const vector<unsigned long>& counters )
00030 {
00031 for(vector<unsigned long>::size_type item_index = 0;
00032 item_index < counters.size(); item_index++)
00033 main_trie.add_empty_state( item_index, counters[item_index] );
00034 if( !main_trie.edgevector.empty() ) main_trie.maxpath = 1;
00035 }
00036
00041 void Apriori_Trie::candidate_generation( const itemtype& frequent_size )
00042 {
00043 if( frequent_size == 1 ) candidate_generation_two();
00044 else if( main_trie.maxpath == frequent_size )
00045 {
00046 set<itemtype> maybe_candidate;
00047 candidate_generation_assist( &main_trie, frequent_size-1,
00048 maybe_candidate );
00049 }
00050 }
00051
00059 void Apriori_Trie::find_candidate( const vector<itemtype>& basket,
00060 const itemtype candidate_size,
00061 const unsigned long counter_incr)
00062 {
00063 if( candidate_size != 2 )
00064 if ( candidate_size<basket.size()+1 )
00065 main_trie.find_candidate( basket.end()-candidate_size+1, candidate_size,
00066 basket.begin(), counter_incr );
00067 else;
00068 else find_candidate_two( basket, counter_incr );
00069 }
00070
00075 void Apriori_Trie::delete_infrequent( const double min_occurrence,
00076 const itemtype candidate_size )
00077 {
00078 if( candidate_size != 2 )
00079 main_trie.delete_infrequent( min_occurrence, candidate_size - 1 );
00080 else delete_infrequent_two( min_occurrence );
00081 }
00082
00087 void Apriori_Trie::association(
00088 const double min_conf, Input_Output_Manager& input_output_manager ) const
00089 {
00090 input_output_manager << "\nAssociation rules:\ncondition ==>";
00091 input_output_manager << "consequence (confidence, occurrence)\n";
00092 set<itemtype> consequence_part;
00093 assoc_rule_assist( min_conf, &main_trie,
00094 consequence_part, input_output_manager );
00095 }
00096
00097 itemtype Apriori_Trie::longest_path() const
00098 {
00099 return main_trie.maxpath;
00100 }
00101
00102 void Apriori_Trie::write_content_to_file(
00103 Input_Output_Manager& input_output_manager ) const
00104 {
00105 input_output_manager<< "Frequent 0-itemsets:\nitemset (occurrence)\n";
00106 input_output_manager<< "{} ("<< main_trie.counter << ")\n";
00107 for( itemtype item_size = 1; item_size < main_trie.maxpath+1; item_size++ )
00108 {
00109 input_output_manager<< "Frequent " << item_size;
00110 input_output_manager << "-itemsets:\nitemset (occurrence)\n";
00111 set<itemtype> frequent_itemset;
00112 write_content_to_file_assist( input_output_manager,
00113 &main_trie, item_size, frequent_itemset );
00114 }
00115 }
00116
00117 void Apriori_Trie::show_content_preorder( ) const
00118 {
00119 main_trie.show_content_preorder( );
00120 }
00121
00122
00123 Apriori_Trie::~Apriori_Trie()
00124 {
00125 }
00126
00131 bool Apriori_Trie::is_all_subset_frequent(
00132 const set<itemtype>& maybe_candidate ) const
00133 {
00134 if( maybe_candidate.size() < 3) return true;
00135
00136 else
00137 {
00138 set<itemtype> temp_itemset(maybe_candidate);
00139 set<itemtype>::const_iterator item_it = --(--maybe_candidate.end());
00140 do
00141 {
00142 item_it--;
00143 temp_itemset.erase( *item_it );
00144 if( !main_trie.is_included( temp_itemset, temp_itemset.begin() ) )
00145 return false;
00146 temp_itemset.insert( *item_it );
00147 }
00148 while ( item_it != maybe_candidate.begin() );
00149 return true;
00150 }
00151 }
00152
00153 void Apriori_Trie::candidate_generation_two()
00154 {
00155 if( !main_trie.edgevector.empty() )
00156 {
00157 main_trie.maxpath = 2;
00158 temp_counter_array.reserve(main_trie.edgevector.size()-1);
00159 temp_counter_array.resize(main_trie.edgevector.size()-1);
00160 for( vector<Edge>::size_type stateIndex = 0;
00161 stateIndex < main_trie.edgevector.size()-1; stateIndex++ )
00162 {
00163 temp_counter_array[stateIndex].reserve(
00164 main_trie.edgevector.size()-1-stateIndex );
00165 temp_counter_array[stateIndex].resize(
00166 main_trie.edgevector.size()-1-stateIndex, 0);
00167 }
00168 }
00169 }
00170
00171 void Apriori_Trie::candidate_generation_assist(
00172 Trie* trie,
00173 const itemtype distance_from_generator,
00174 set<itemtype>& maybe_candidate)
00175 {
00176 itemtype temp_maxpath = trie->maxpath;
00177 vector<Edge>::iterator itEdge = trie->edgevector.begin();
00178 if( distance_from_generator )
00179 {
00180 for( ; itEdge != trie->edgevector.end(); itEdge++ )
00181 if( (*itEdge).subtrie->maxpath + 1 >= distance_from_generator )
00182 {
00183 maybe_candidate.insert((*itEdge).label);
00184 candidate_generation_assist(
00185 (*itEdge).subtrie, distance_from_generator - 1, maybe_candidate );
00186 maybe_candidate.erase((*itEdge).label);
00187 if( temp_maxpath < (*itEdge).subtrie->maxpath + 1 )
00188 temp_maxpath = (*itEdge).subtrie->maxpath + 1;
00189 }
00190 if( trie->maxpath < temp_maxpath )
00191 trie->maxpath = temp_maxpath;
00192 }
00193 else
00194 {
00195 vector<Edge>::iterator itEdge2;
00196 Trie* toExtend;
00197 for( ; itEdge != trie->edgevector.end(); itEdge++ )
00198 {
00199 maybe_candidate.insert((*itEdge).label);
00200 toExtend = (*itEdge).subtrie;
00201 for( itEdge2 = itEdge + 1;
00202 itEdge2 != trie->edgevector.end(); itEdge2++ )
00203 {
00204 maybe_candidate.insert( (*itEdge2).label );
00205 if( is_all_subset_frequent( maybe_candidate) )
00206 toExtend->add_empty_state( (*itEdge2).label );
00207 maybe_candidate.erase( (*itEdge2).label );
00208 }
00209 if( !toExtend->edgevector.empty())
00210 {
00211 toExtend->maxpath = 1;
00212 trie->maxpath=2;
00213 }
00214
00215 (vector<Edge>(toExtend->edgevector)).swap(toExtend->edgevector);
00216 maybe_candidate.erase((*itEdge).label);
00217 }
00218
00219 }
00220 }
00221
00228 void Apriori_Trie::find_candidate_two( const vector<itemtype>& basket,
00229 const unsigned long counter )
00230 {
00231 if( basket.size() > 1)
00232 {
00233 vector<itemtype>::const_iterator it1_basket,
00234 it2_basket;
00235
00236 for( it1_basket = basket.begin(); it1_basket != basket.end()-1;
00237 it1_basket++)
00238 for( it2_basket = it1_basket+1; it2_basket != basket.end();
00239 it2_basket++)
00240 temp_counter_array[*it1_basket][*it2_basket-*it1_basket-1]
00241 += counter;
00242 }
00243 }
00244
00248 void Apriori_Trie::delete_infrequent_two( const double min_occurrence )
00249 {
00250 vector<Edge>::size_type stateIndex_1,
00251 stateIndex_2;
00252 for( stateIndex_1 = 0; stateIndex_1 < main_trie.edgevector.size()-1;
00253 stateIndex_1++ )
00254 {
00255 for( stateIndex_2 = 0;
00256 stateIndex_2 < main_trie.edgevector.size() - 1 - stateIndex_1;
00257 stateIndex_2++ )
00258 {
00259 if( temp_counter_array[stateIndex_1][stateIndex_2] > min_occurrence )
00260 main_trie.edgevector[stateIndex_1].subtrie->add_empty_state(
00261 stateIndex_1 + stateIndex_2 + 1,
00262 temp_counter_array[stateIndex_1][stateIndex_2] );
00263 }
00264 if( !main_trie.edgevector[stateIndex_1].subtrie->edgevector.empty() )
00265 {
00266 main_trie.edgevector[stateIndex_1].subtrie->maxpath = 1;
00267 main_trie.maxpath = 2;
00268 }
00269 temp_counter_array[stateIndex_1].clear();
00271 vector<unsigned long>().swap(temp_counter_array[stateIndex_1]);
00272 }
00273 temp_counter_array.clear();
00275 vector< vector<unsigned long> >().swap(temp_counter_array);
00276 }
00277
00278 void Apriori_Trie::assoc_rule_find(
00279 const double min_conf, set<itemtype>& condition_part,
00280 set<itemtype>& consequence_part, const unsigned long union_support,
00281 Input_Output_Manager& input_output_manager ) const
00282 {
00283 itemtype item;
00284 for( set<itemtype>::const_iterator item_it = consequence_part.begin();
00285 item_it != consequence_part.end(); item_it++)
00286 if( condition_part.empty() || *(--condition_part.end()) < *item_it)
00287 {
00288 item = *item_it;
00289 consequence_part.erase( item );
00290 condition_part.insert( item );
00291 if( union_support > main_trie.is_included(
00292 condition_part, condition_part.begin() )->counter * min_conf )
00293 {
00294 input_output_manager<< '\n';
00295 input_output_manager.write_out_basket(condition_part);
00296 input_output_manager<< "==> ";
00297 input_output_manager.write_out_basket(consequence_part);
00298 input_output_manager<< "("<<((double) union_support)
00299 / main_trie.is_included(condition_part,
00300 condition_part.begin())->counter;
00301 input_output_manager<< ", " << union_support << ')';
00302 }
00303 else if( consequence_part.size() > 1 )
00304 assoc_rule_find( min_conf, condition_part,
00305 consequence_part, union_support,
00306 input_output_manager );
00307 item_it = (consequence_part.insert( item )).first;
00308 condition_part.erase( item );
00309 }
00310 }
00311
00312 void Apriori_Trie::assoc_rule_assist(
00313 const double min_conf, const Trie* trie,
00314 set<itemtype>& consequence_part,
00315 Input_Output_Manager& input_output_manager) const
00316 {
00317 if( consequence_part.size() > 1 )
00318 {
00319 set<itemtype> condition_part;
00320 assoc_rule_find( min_conf, condition_part, consequence_part,
00321 trie->counter, input_output_manager );
00322 }
00323 for( vector<Edge>::const_iterator it_item = trie->edgevector.begin();
00324 it_item != trie->edgevector.end(); it_item++)
00325 {
00326 consequence_part.insert( (*it_item).label );
00327 assoc_rule_assist( min_conf, (*it_item).subtrie, consequence_part,
00328 input_output_manager);
00329 consequence_part.erase( (*it_item).label );
00330 }
00331 }
00332
00333
00334 void Apriori_Trie::write_content_to_file_assist(
00335 Input_Output_Manager& input_output_manager, const Trie* trie,
00336 const itemtype distance_from_frequent,
00337 set<itemtype>& frequent_itemset ) const
00338 {
00339 if( distance_from_frequent )
00340 {
00341 for( vector<Edge>::const_iterator it = trie->edgevector.begin();
00342 it != trie->edgevector.end(); it++ )
00343 if( (*it).subtrie->maxpath + 1 >= distance_from_frequent )
00344 {
00345 frequent_itemset.insert( (*it).label );
00346 write_content_to_file_assist( input_output_manager,
00347 (*it).subtrie,
00348 distance_from_frequent -1,
00349 frequent_itemset );
00350 frequent_itemset.erase( (*it).label );
00351 }
00352 }
00353 else
00354 input_output_manager.write_out_basket_and_counter( frequent_itemset,
00355 trie->counter );
00356 }
00357