Main Page | Namespace List | Class List | File List | Class Members | File Members

MSApriori_Trie.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002                           MSApriori_Trie.cpp  -  description
00003                              -------------------
00004     begin                : cs dec 26 2002
00005     copyright            : (C) 2002 by Ferenc Bodon
00006     email                : bodon@mit.bme.hu
00007  ***************************************************************************/
00008 
00009 
00010 #include "MSApriori_Trie.hpp"
00011 #include <cstdlib>
00012 #include <algorithm>
00013 #include <iostream>
00014 
00015 
00020 MSApriori_Trie::MSApriori_Trie(const unsigned long counter_of_emptyset, const vector<double>& mis_abs ):
00021    main_trie( NULL, counter_of_emptyset),mis_abs(mis_abs)
00022 {
00023 }
00024 
00030 void MSApriori_Trie::insert_frequent_items( const set< pair<itemtype, unsigned long> >& counters )
00031 {
00032    for(set< pair<itemtype, unsigned long> >::iterator it = counters.begin(); it != counters.end(); it++)
00033       main_trie.add_empty_state( (*it).first, (*it).second );
00034    if( !main_trie.edgevector.empty() ) main_trie.maxpath = 1;
00035 }
00036 
00040 void MSApriori_Trie::candidate_generation( const itemtype& frequent_size )
00041 {
00042    if( frequent_size == 1 ) candidate_generation_two();
00043    else if( main_trie.maxpath == frequent_size )
00044    {
00045       set<itemtype> maybe_candidate;
00046       candidate_generation_assist( &main_trie, frequent_size-1, maybe_candidate );
00047    }
00048 }
00049 
00057 void MSApriori_Trie::find_candidate( const vector<itemtype>& basket, const itemtype candidate_size, 
00058                                      const unsigned long counter_incr)
00059 {
00060    if( candidate_size != 2 ) 
00061       main_trie.find_candidate( basket, candidate_size, basket.begin(), counter_incr );
00062    else find_candidate_two( basket, counter_incr );    
00063 }
00064 
00068 void MSApriori_Trie::delete_infrequent( const itemtype candidate_size )
00069 {
00070    if( candidate_size != 2 )
00071    {
00072       for( vector<Edge>::iterator itEdge =  main_trie.edgevector.begin(); 
00073            itEdge !=  main_trie.edgevector.end(); itEdge++ )
00074          if( (*itEdge).subtrie->maxpath == candidate_size - 1 )
00075               (*itEdge).subtrie->delete_infrequent( mis_abs[(*itEdge).label], candidate_size - 2 );
00076    }
00077    else delete_infrequent_two( );
00078 }
00079 
00084 void MSApriori_Trie::association( const double min_conf, Input_Output_Manager& input_output_manager ) const
00085 {
00086    input_output_manager << "\nAssociation rules:\ncondition ==> consequence (confidence, occurrence)\n";
00087    set<itemtype> consequence_part;
00088    assoc_rule_assist( min_conf, &main_trie, consequence_part, input_output_manager );
00089 }
00090 
00091 itemtype MSApriori_Trie::longest_path() const
00092 {
00093    return main_trie.maxpath;
00094 }
00095 
00096 void MSApriori_Trie::write_content_to_file( Input_Output_Manager& input_output_manager ) const
00097 {
00098    input_output_manager<< "Frequent 0-itemsets:\nitemset (occurrence)\n";
00099    input_output_manager<< "{} ("<< main_trie.counter << ')' << endl;
00100    for( itemtype item_size = 1; item_size < main_trie.maxpath+1; item_size++ )
00101    {
00102       input_output_manager<< "Frequent " << item_size << "-itemsets:\nitemset (occurrence)\n";
00103       set<itemtype> frequent_itemset;
00104       write_content_to_file_assist( input_output_manager, &main_trie, item_size, frequent_itemset );
00105    }
00106 }
00107 
00108 void MSApriori_Trie::show_content_preorder( ) const
00109 {
00110    main_trie.show_content_preorder( );
00111 }
00112 
00113 
00114 MSApriori_Trie::~MSApriori_Trie()
00115 {
00116 }
00117 
00122 bool MSApriori_Trie::is_all_subset_frequent( const set<itemtype>& maybe_candidate ) const
00123 {
00124    if( maybe_candidate.size() < 3) return true;   // because of the candidate generation method!
00125    else
00126    {
00127       set<itemtype>                 temp_itemset(maybe_candidate);
00128       set<itemtype>::const_iterator item_it = --(--maybe_candidate.end());
00129       set<itemtype>::const_iterator it_lower_bound = ++maybe_candidate.begin();
00130       while ( item_it != it_lower_bound )
00131       {
00132          item_it--;
00133          temp_itemset.erase( *item_it );
00134          if( !main_trie.is_included( temp_itemset, temp_itemset.begin() ) ) return false;
00135          temp_itemset.insert( *item_it );
00136       }
00137       if( mis_abs[*it_lower_bound] ==  mis_abs[*maybe_candidate.begin()] )
00138       {
00139          temp_itemset.erase( *maybe_candidate.begin() );
00140          if( main_trie.is_included( temp_itemset, temp_itemset.begin() ) ) return true;
00141          else return false;
00142       }
00143       else return true;
00144    }
00145 }
00146 
00152 void MSApriori_Trie::find_candidate_two( const vector<itemtype>& basket, const unsigned long counter )
00153 {
00154    if(basket.size() > 1)
00155    {
00156       vector<itemtype>::const_iterator it1_basket,
00157                                        it2_basket;
00158 
00159       for( it1_basket = basket.begin(); it1_basket != basket.end()-1; it1_basket++)
00160          for( it2_basket = it1_basket+1; it2_basket != basket.end(); it2_basket++)
00161             temp_counter_array[*it1_basket][*it2_basket-*it1_basket-1] += counter;
00162    }
00163 }
00164 
00165 void MSApriori_Trie::candidate_generation_two( )
00166 {
00167    if( !mis_abs.empty() )
00168    {
00169       main_trie.maxpath = 2;
00170       temp_counter_array.reserve( mis_abs.size() - 1 );
00171       temp_counter_array.resize( mis_abs.size() - 1 );
00172       for( vector<Edge>::size_type stateIndex = 0; stateIndex <  mis_abs.size() - 1; stateIndex++ )
00173       {
00174          temp_counter_array[stateIndex].reserve( mis_abs.size() - 1 - stateIndex );
00175          temp_counter_array[stateIndex].resize( mis_abs.size() - 1 - stateIndex, 0);
00176       }
00177    }
00178 }
00179 
00180 
00181 void MSApriori_Trie::candidate_generation_assist( Trie* trie, const itemtype distance_from_generator,
00182                                                   set<itemtype>& maybe_candidate)
00183 {
00184    vector<Edge>::iterator itEdge = trie->edgevector.begin();
00185    if( distance_from_generator )
00186    {
00187       for( ; itEdge != trie->edgevector.end(); itEdge++ )
00188       if( (*itEdge).subtrie->maxpath + 1 >= distance_from_generator )
00189       {
00190          maybe_candidate.insert((*itEdge).label);
00191          candidate_generation_assist((*itEdge).subtrie, distance_from_generator - 1, maybe_candidate );
00192          maybe_candidate.erase((*itEdge).label);
00193       }
00194    }
00195    else
00196    {
00197       vector<Edge>::iterator itEdge2;
00198       Trie* toExtend;
00199       for( ; itEdge != trie->edgevector.end(); itEdge++ )
00200       {
00201          maybe_candidate.insert((*itEdge).label);
00202          toExtend = (*itEdge).subtrie;
00203          for( itEdge2 = itEdge + 1; itEdge2 != trie->edgevector.end(); itEdge2++ )
00204          {
00205             maybe_candidate.insert( (*itEdge2).label );
00206             if( is_all_subset_frequent( maybe_candidate) )
00207                toExtend->add_empty_state( (*itEdge2).label );
00208             maybe_candidate.erase( (*itEdge2).label );
00209          }
00210          if( !toExtend->edgevector.empty()) toExtend->maxpath = 1;
00211          (vector<Edge>(toExtend->edgevector)).swap(toExtend->edgevector);  // we know that state toExtend will not have any more children!
00212           maybe_candidate.erase((*itEdge).label);
00213       }
00214       trie->max_path_set();
00215    }
00216 }
00217 
00218 void MSApriori_Trie::delete_infrequent_two(  )
00219 {
00220    vector<Edge>::size_type stateIndex_1,
00221                             stateIndex_2;
00222    vector<Edge>::iterator it;
00223    for( stateIndex_1 = 0; stateIndex_1 < mis_abs.size()-1; stateIndex_1++ )
00224    {
00225       it = lower_bound(main_trie.edgevector.begin(), 
00226                        main_trie.edgevector.end(), stateIndex_1, Edge_point_less);
00227       for( stateIndex_2 = 0; stateIndex_2 < mis_abs.size() - 1 - stateIndex_1; stateIndex_2++ )
00228         if( temp_counter_array[stateIndex_1][stateIndex_2] > mis_abs[stateIndex_1] )
00229            (*it).subtrie->add_empty_state( stateIndex_1 + stateIndex_2 + 1,
00230                                             temp_counter_array[stateIndex_1][stateIndex_2] );
00231       temp_counter_array[stateIndex_1].clear();
00232       vector<unsigned long>().swap(temp_counter_array[stateIndex_1]);   
00233       if( !(*it).subtrie->edgevector.empty() ) (*it).subtrie->maxpath = 1;      
00234    }
00235    temp_counter_array.clear();
00236    vector< vector<unsigned long> >().swap(temp_counter_array);            
00237    main_trie.max_path_set();
00238 }
00239 
00240 void MSApriori_Trie::assoc_rule_find( const double min_conf, set<itemtype>& condition_part, set<itemtype>& consequence_part, 
00241                                     const unsigned long union_support, Input_Output_Manager& input_output_manager ) const
00242 {
00243    itemtype                      item;
00244    for( set<itemtype>::const_iterator item_it = consequence_part.begin(); item_it != consequence_part.end(); item_it++)
00245    if( condition_part.empty() || *(--condition_part.end()) < *item_it)
00246    {
00247       item = *item_it;
00248       consequence_part.erase( item );
00249       condition_part.insert( item );
00250       if( union_support > main_trie.is_included(condition_part, condition_part.begin() )->counter * min_conf)
00251       {
00252          input_output_manager<< endl;
00253          input_output_manager.write_out_basket(condition_part);
00254          input_output_manager<< "==> ";
00255          input_output_manager.write_out_basket(consequence_part);
00256          input_output_manager<< "("<<((double) union_support) / main_trie.is_included(condition_part, condition_part.begin())->counter << ", " << union_support << ')';
00257       }
00258       else if( consequence_part.size() > 1 ) assoc_rule_find( min_conf, condition_part, consequence_part, union_support, input_output_manager );
00259       item_it = (consequence_part.insert( item )).first;
00260       condition_part.erase( item );
00261    }
00262 }
00263 
00264 void MSApriori_Trie::assoc_rule_assist( const double min_conf, const Trie* trie, set<itemtype>& consequence_part, Input_Output_Manager& input_output_manager) const
00265 {
00266    if( consequence_part.size() > 1 )
00267    {
00268       set<itemtype> condition_part;
00269       assoc_rule_find( min_conf, condition_part, consequence_part, trie->counter, input_output_manager );
00270    }
00271    for( vector<Edge>::const_iterator it_item = trie->edgevector.begin(); it_item != trie->edgevector.end(); it_item++)
00272    {
00273       consequence_part.insert( (*it_item).label );
00274       assoc_rule_assist( min_conf, (*it_item).subtrie, consequence_part, input_output_manager);
00275       consequence_part.erase( (*it_item).label );
00276    }
00277 }
00278 void MSApriori_Trie::write_content_to_file_assist( Input_Output_Manager& input_output_manager, const Trie* trie, const itemtype distance_from_frequent, set<itemtype>& frequent_itemset ) const
00279 {
00280    if( distance_from_frequent )
00281    {
00282       for( vector<Edge>::const_iterator it = trie->edgevector.begin(); it != trie->edgevector.end(); it++ )
00283       if( (*it).subtrie->maxpath + 1 >= distance_from_frequent )
00284       {
00285          frequent_itemset.insert( (*it).label );
00286          write_content_to_file_assist( input_output_manager, (*it).subtrie, distance_from_frequent -1, frequent_itemset );
00287          frequent_itemset.erase( (*it).label );
00288       }
00289    }
00290    else input_output_manager.write_out_basket_and_counter( frequent_itemset, trie->counter );
00291 }
00292 

Generated on Sun Jun 20 23:41:08 2004 for APRIORI algorithm by doxygen 1.3.5