Main Page | Namespace List | Class List | File List | Class Members | File Members

Apriori_Trie.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002                           Apriori_Trie.cpp  -  description
00003                              -------------------
00004     begin                : cs dec 26 2002
00005     copyright            : (C) 2002 by Ferenc Bodon
00006     email                : bodon@mit.bme.hu
00007  ***************************************************************************/
00008 
00009 
00010 #include "Apriori_Trie.hpp"
00011 #include <cstdlib>
00012 #include <algorithm>
00013 #include <iostream>
00014 
00019 Apriori_Trie::Apriori_Trie(const unsigned long counter_of_emptyset ):
00020    main_trie(counter_of_emptyset)
00021 {
00022 }
00023 
00028 void Apriori_Trie::insert_frequent_items(
00029    const vector<unsigned long>& counters )
00030 {
00031    for(vector<unsigned long>::size_type item_index = 0; 
00032        item_index < counters.size(); item_index++)
00033       main_trie.add_empty_state( item_index, counters[item_index] );
00034    if( !main_trie.edgevector.empty() ) main_trie.maxpath = 1;
00035 }
00036 
00041 void Apriori_Trie::candidate_generation( const itemtype& frequent_size )
00042 {
00043    if( frequent_size == 1 ) candidate_generation_two();
00044    else if( main_trie.maxpath == frequent_size )
00045    {
00046       set<itemtype> maybe_candidate;
00047       candidate_generation_assist( &main_trie, frequent_size-1, 
00048                                    maybe_candidate );
00049    }
00050 }
00051 
00059 void Apriori_Trie::find_candidate( const vector<itemtype>& basket, 
00060                                    const itemtype candidate_size, 
00061                                    const unsigned long counter_incr)
00062 {
00063    if( candidate_size != 2 ) 
00064       if ( candidate_size<basket.size()+1 )
00065          main_trie.find_candidate( basket.end()-candidate_size+1, candidate_size, 
00066                                    basket.begin(), counter_incr );
00067       else;
00068    else find_candidate_two( basket, counter_incr );    
00069 }
00070 
00075 void Apriori_Trie::delete_infrequent( const double min_occurrence, 
00076                                       const itemtype candidate_size )
00077 {
00078    if( candidate_size != 2 ) 
00079       main_trie.delete_infrequent( min_occurrence, candidate_size - 1 );
00080    else delete_infrequent_two( min_occurrence );
00081 }
00082 
00087 void Apriori_Trie::association( 
00088    const double min_conf, Input_Output_Manager& input_output_manager ) const
00089 {
00090    input_output_manager << "\nAssociation rules:\ncondition ==>";
00091    input_output_manager << "consequence (confidence, occurrence)\n";
00092    set<itemtype> consequence_part;
00093    assoc_rule_assist( min_conf, &main_trie, 
00094                       consequence_part, input_output_manager );
00095 }
00096 
00097 itemtype Apriori_Trie::longest_path() const
00098 {
00099    return main_trie.maxpath;
00100 }
00101 
00102 void Apriori_Trie::write_content_to_file( 
00103    Input_Output_Manager& input_output_manager ) const
00104 {
00105    input_output_manager<< "Frequent 0-itemsets:\nitemset (occurrence)\n";
00106    input_output_manager<< "{} ("<< main_trie.counter << ")\n";
00107    for( itemtype item_size = 1; item_size < main_trie.maxpath+1; item_size++ )
00108    {
00109       input_output_manager<< "Frequent " << item_size;
00110       input_output_manager << "-itemsets:\nitemset (occurrence)\n";
00111       set<itemtype> frequent_itemset;
00112       write_content_to_file_assist( input_output_manager, 
00113                                     &main_trie, item_size, frequent_itemset );
00114    }
00115 }
00116 
00117 void Apriori_Trie::show_content_preorder( ) const
00118 {
00119    main_trie.show_content_preorder( );
00120 }
00121 
00122 
00123 Apriori_Trie::~Apriori_Trie()
00124 {
00125 }
00126 
00131 bool Apriori_Trie::is_all_subset_frequent( 
00132    const set<itemtype>& maybe_candidate ) const
00133 {
00134    if( maybe_candidate.size() < 3) return true; // because of the 
00135                                                 // candidate generation method!
00136    else
00137    {
00138       set<itemtype>                 temp_itemset(maybe_candidate);
00139       set<itemtype>::const_iterator item_it = --(--maybe_candidate.end());
00140       do
00141       {
00142          item_it--;
00143          temp_itemset.erase( *item_it );
00144          if( !main_trie.is_included( temp_itemset, temp_itemset.begin() ) ) 
00145             return false;
00146          temp_itemset.insert( *item_it );
00147       }
00148       while ( item_it != maybe_candidate.begin() );
00149       return true;
00150    }
00151 }
00152 
00153 void Apriori_Trie::candidate_generation_two()
00154 {
00155    if( !main_trie.edgevector.empty() )
00156    {
00157       main_trie.maxpath = 2;
00158       temp_counter_array.reserve(main_trie.edgevector.size()-1);
00159       temp_counter_array.resize(main_trie.edgevector.size()-1);
00160       for( vector<Edge>::size_type stateIndex = 0; 
00161            stateIndex < main_trie.edgevector.size()-1; stateIndex++ )
00162       {
00163          temp_counter_array[stateIndex].reserve(
00164             main_trie.edgevector.size()-1-stateIndex );
00165          temp_counter_array[stateIndex].resize(
00166             main_trie.edgevector.size()-1-stateIndex, 0);
00167       }
00168    }
00169 }
00170 
00171 void Apriori_Trie::candidate_generation_assist( 
00172    Trie* trie, 
00173    const itemtype distance_from_generator,
00174    set<itemtype>& maybe_candidate)
00175 {
00176    itemtype temp_maxpath = trie->maxpath;
00177    vector<Edge>::iterator itEdge = trie->edgevector.begin();
00178    if( distance_from_generator )
00179    {
00180       for( ; itEdge != trie->edgevector.end(); itEdge++ )
00181       if( (*itEdge).subtrie->maxpath + 1 >= distance_from_generator )
00182       {
00183          maybe_candidate.insert((*itEdge).label);
00184          candidate_generation_assist(
00185             (*itEdge).subtrie, distance_from_generator - 1, maybe_candidate );
00186          maybe_candidate.erase((*itEdge).label);
00187          if( temp_maxpath < (*itEdge).subtrie->maxpath + 1 )
00188             temp_maxpath = (*itEdge).subtrie->maxpath + 1;
00189       }
00190       if( trie->maxpath < temp_maxpath )
00191          trie->maxpath = temp_maxpath;
00192    }
00193    else
00194    {
00195       vector<Edge>::iterator itEdge2;
00196       Trie* toExtend;
00197       for( ; itEdge != trie->edgevector.end(); itEdge++ )
00198       {
00199          maybe_candidate.insert((*itEdge).label);
00200          toExtend = (*itEdge).subtrie;
00201          for( itEdge2 = itEdge + 1; 
00202               itEdge2 != trie->edgevector.end(); itEdge2++ )
00203          {
00204             maybe_candidate.insert( (*itEdge2).label );
00205             if( is_all_subset_frequent( maybe_candidate) )
00206                toExtend->add_empty_state( (*itEdge2).label );
00207             maybe_candidate.erase( (*itEdge2).label );
00208          }
00209          if( !toExtend->edgevector.empty())
00210          {
00211             toExtend->maxpath = 1;
00212             trie->maxpath=2;    
00213          }    
00214   // we know that state toExtend will not have any more children!
00215          (vector<Edge>(toExtend->edgevector)).swap(toExtend->edgevector);  
00216           maybe_candidate.erase((*itEdge).label);
00217       }
00218 
00219    }
00220 }
00221 
00228 void Apriori_Trie::find_candidate_two( const vector<itemtype>& basket, 
00229                                        const unsigned long counter )
00230 {
00231    if( basket.size() > 1)
00232    {
00233       vector<itemtype>::const_iterator it1_basket,
00234                                        it2_basket;
00235 
00236       for( it1_basket = basket.begin(); it1_basket != basket.end()-1; 
00237            it1_basket++)
00238          for( it2_basket = it1_basket+1; it2_basket != basket.end(); 
00239               it2_basket++)
00240             temp_counter_array[*it1_basket][*it2_basket-*it1_basket-1] 
00241                += counter;
00242    }
00243 }
00244 
00248 void Apriori_Trie::delete_infrequent_two( const double min_occurrence )
00249 {
00250    vector<Edge>::size_type stateIndex_1,
00251                             stateIndex_2;
00252    for( stateIndex_1 = 0; stateIndex_1 < main_trie.edgevector.size()-1; 
00253         stateIndex_1++ )
00254    {
00255       for( stateIndex_2 = 0; 
00256            stateIndex_2 < main_trie.edgevector.size() - 1 - stateIndex_1; 
00257            stateIndex_2++ )
00258       {
00259         if( temp_counter_array[stateIndex_1][stateIndex_2] > min_occurrence )
00260            main_trie.edgevector[stateIndex_1].subtrie->add_empty_state( 
00261               stateIndex_1 + stateIndex_2 + 1,
00262               temp_counter_array[stateIndex_1][stateIndex_2] );
00263       }
00264       if( !main_trie.edgevector[stateIndex_1].subtrie->edgevector.empty() )
00265       {
00266          main_trie.edgevector[stateIndex_1].subtrie->maxpath = 1;
00267          main_trie.maxpath = 2;
00268       }
00269       temp_counter_array[stateIndex_1].clear();
00271       vector<unsigned long>().swap(temp_counter_array[stateIndex_1]);  
00272    }
00273    temp_counter_array.clear();
00275    vector< vector<unsigned long> >().swap(temp_counter_array);
00276 }
00277 
00278 void Apriori_Trie::assoc_rule_find( 
00279    const double min_conf, set<itemtype>& condition_part, 
00280    set<itemtype>& consequence_part, const unsigned long union_support, 
00281    Input_Output_Manager& input_output_manager ) const
00282 {
00283    itemtype item;
00284    for( set<itemtype>::const_iterator item_it = consequence_part.begin(); 
00285         item_it != consequence_part.end(); item_it++)
00286    if( condition_part.empty() || *(--condition_part.end()) < *item_it)
00287    {
00288       item = *item_it;
00289       consequence_part.erase( item );
00290       condition_part.insert( item );
00291       if( union_support > main_trie.is_included(
00292              condition_part, condition_part.begin() )->counter * min_conf )
00293       {
00294          input_output_manager<< '\n';
00295          input_output_manager.write_out_basket(condition_part);
00296          input_output_manager<< "==> ";
00297          input_output_manager.write_out_basket(consequence_part);
00298          input_output_manager<< "("<<((double) union_support) 
00299             / main_trie.is_included(condition_part, 
00300                                     condition_part.begin())->counter;
00301          input_output_manager<< ", " << union_support << ')';
00302       }
00303       else if( consequence_part.size() > 1 ) 
00304          assoc_rule_find( min_conf, condition_part, 
00305                           consequence_part, union_support, 
00306                           input_output_manager );
00307       item_it = (consequence_part.insert( item )).first;
00308       condition_part.erase( item );
00309    }
00310 }
00311 
00312 void Apriori_Trie::assoc_rule_assist( 
00313    const double min_conf, const Trie* trie, 
00314    set<itemtype>& consequence_part, 
00315    Input_Output_Manager& input_output_manager) const
00316 {
00317    if( consequence_part.size() > 1 )
00318    {
00319       set<itemtype> condition_part;
00320       assoc_rule_find( min_conf, condition_part, consequence_part, 
00321                        trie->counter, input_output_manager );
00322    }
00323    for( vector<Edge>::const_iterator it_item = trie->edgevector.begin(); 
00324         it_item != trie->edgevector.end(); it_item++)
00325    {
00326       consequence_part.insert( (*it_item).label );
00327       assoc_rule_assist( min_conf, (*it_item).subtrie, consequence_part, 
00328                          input_output_manager);
00329       consequence_part.erase( (*it_item).label );
00330    }
00331 }
00332 
00333 
00334 void Apriori_Trie::write_content_to_file_assist( 
00335    Input_Output_Manager& input_output_manager, const Trie* trie, 
00336    const itemtype distance_from_frequent, 
00337    set<itemtype>& frequent_itemset ) const
00338 {
00339    if( distance_from_frequent )
00340    {
00341       for( vector<Edge>::const_iterator it = trie->edgevector.begin(); 
00342            it != trie->edgevector.end(); it++ )
00343       if( (*it).subtrie->maxpath + 1 >= distance_from_frequent )
00344       {
00345          frequent_itemset.insert( (*it).label );
00346          write_content_to_file_assist( input_output_manager, 
00347                                        (*it).subtrie, 
00348                                        distance_from_frequent -1, 
00349                                        frequent_itemset );
00350          frequent_itemset.erase( (*it).label );
00351       }
00352    }
00353    else 
00354       input_output_manager.write_out_basket_and_counter( frequent_itemset, 
00355                                                          trie->counter );
00356 }
00357 

Generated on Mon Aug 16 19:59:29 2004 for APRIORI algorithm by doxygen 1.3.5