Main Page | Namespace List | Class List | File List | Class Members | File Members

Apriori_Trie.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002                           Apriori_Trie.cpp  -  description
00003                              -------------------
00004     begin                : cs dec 26 2002
00005     copyright            : (C) 2002 by Ferenc Bodon
00006     email                : bodon@cs.bme.hu
00007  ***************************************************************************/
00008 
00009 
00010 #include "Apriori_Trie.hpp"
00011 #include <cstdlib>
00012 #include <algorithm>
00013 #include <iostream>
00014 
00015 
00020 void Apriori_Trie::insert_frequent_items(
00021    const vector<countertype>& counters )
00022 {
00023    for(vector<countertype>::size_type item_index = 0; 
00024        item_index < counters.size(); item_index++)
00025       main_trie.add_empty_state( item_index, counters[item_index] );
00026 }
00027 
00032 void Apriori_Trie::candidate_generation( 
00033    const itemtype& frequent_size,
00034    Input_Output_Manager& input_output_manager )
00035 {
00036    if( frequent_size == 1 ) candidate_generation_two();
00037    else
00038    {
00039       set<itemtype> maybe_candidate;
00040       candidate_generation_assist( &main_trie,
00041                                    maybe_candidate, 
00042                                    input_output_manager );
00043    }
00044 }
00045 
00053 void Apriori_Trie::find_candidate( const vector<itemtype>& basket, 
00054                                    const itemtype candidate_size, 
00055                                    const countertype counter_incr)
00056 {
00057    if( candidate_size != 2 ) 
00058       if ( candidate_size<basket.size()+1 )
00059          main_trie.find_candidate( basket.end()-candidate_size+1, 
00060                                    basket.begin(), counter_incr );
00061       else;
00062    else find_candidate_two( basket, counter_incr );    
00063 }
00064 
00069 void Apriori_Trie::delete_infrequent( const double min_occurrence, 
00070                                       const itemtype candidate_size )
00071 {
00072    if( candidate_size != 2 ) 
00073       main_trie.delete_infrequent( min_occurrence );
00074    else delete_infrequent_two( min_occurrence );
00075 }
00076 
00077 
00082 bool Apriori_Trie::is_all_subset_frequent( 
00083    const set<itemtype>& maybe_candidate ) const
00084 {
00085    if( maybe_candidate.size() < 3) return true; // because of the 
00086                                                 // candidate generation method!
00087    else
00088    {
00089       set<itemtype>                 temp_itemset(maybe_candidate);
00090       set<itemtype>::const_iterator item_it = --(--maybe_candidate.end());
00091       do
00092       {
00093          item_it--;
00094          temp_itemset.erase( *item_it );
00095          if( !main_trie.is_included( temp_itemset, temp_itemset.begin() ) ) 
00096             return false;
00097          temp_itemset.insert( *item_it );
00098       }
00099       while ( item_it != maybe_candidate.begin() );
00100       return true;
00101    }
00102 }
00103 
00104 void Apriori_Trie::candidate_generation_two()
00105 {
00106    if( !main_trie.edgevector.empty() )
00107    {
00108       temp_counter_array.reserve(main_trie.edgevector.size()-1);
00109       temp_counter_array.resize(main_trie.edgevector.size()-1);
00110       for( vector<Edge>::size_type stateIndex = 0; 
00111            stateIndex < main_trie.edgevector.size()-1; stateIndex++ )
00112       {
00113          temp_counter_array[stateIndex].reserve(
00114             main_trie.edgevector.size()-1-stateIndex );
00115          temp_counter_array[stateIndex].resize(
00116             main_trie.edgevector.size()-1-stateIndex, 0);
00117       }
00118    }
00119 }
00120 
00121 void Apriori_Trie::candidate_generation_assist( 
00122    Trie* trie, 
00123    set<itemtype>& maybe_candidate, 
00124    Input_Output_Manager& input_output_manager)
00125 {
00126    vector<Edge>::iterator itEdge = trie->edgevector.begin();
00127    if( (*itEdge).subtrie->edgevector.empty() )
00128    {
00129          vector<Edge>::iterator itEdge2;
00130          Trie* toExtend;
00131          while( itEdge != trie->edgevector.end() )
00132          {
00133             maybe_candidate.insert((*itEdge).label);
00134             toExtend = (*itEdge).subtrie;
00135             input_output_manager.write_out_basket_and_counter( 
00136                maybe_candidate, toExtend->counter );
00137             for( itEdge2 = itEdge + 1; itEdge2 != trie->edgevector.end(); 
00138                  itEdge2++ )
00139             {
00140                maybe_candidate.insert( (*itEdge2).label );
00141                if( is_all_subset_frequent( maybe_candidate) )
00142                   toExtend->add_empty_state( (*itEdge2).label );
00143                maybe_candidate.erase( (*itEdge2).label );
00144             }
00145                 // we know that state toExtend 
00146                 // will not have any more children!
00147             (vector<Edge>(toExtend->edgevector)).swap(toExtend->edgevector);  
00148             
00149             maybe_candidate.erase((*itEdge).label);
00150             if( toExtend->edgevector.empty() ) 
00151             {
00152                delete (*itEdge).subtrie;
00153                itEdge = trie->edgevector.erase(itEdge);
00154             }
00155             else itEdge++;
00156          }
00157    }
00158    else
00159    {
00160       while( itEdge != trie->edgevector.end() )
00161       {
00162 
00163          maybe_candidate.insert((*itEdge).label);
00164          candidate_generation_assist((*itEdge).subtrie, 
00165                                      maybe_candidate, input_output_manager );
00166          maybe_candidate.erase((*itEdge).label);
00167          if((*itEdge).subtrie->edgevector.empty())
00168          {
00169             delete (*itEdge).subtrie;
00170             itEdge = trie->edgevector.erase(itEdge);
00171          }
00172          else itEdge++;
00173       }
00174    }
00175 }
00176 
00183 void Apriori_Trie::find_candidate_two( const vector<itemtype>& basket, 
00184                                        const countertype counter )
00185 {
00186    if( basket.size() > 1)
00187    {
00188       vector<itemtype>::const_iterator it1_basket,
00189                                        it2_basket;
00190 
00191       for( it1_basket = basket.begin(); it1_basket != basket.end()-1; 
00192            it1_basket++)
00193          for( it2_basket = it1_basket+1; it2_basket != basket.end(); 
00194               it2_basket++)
00195             temp_counter_array[*it1_basket][*it2_basket-*it1_basket-1] 
00196                += counter;
00197    }
00198 }
00199 
00203 void Apriori_Trie::delete_infrequent_two( const double min_occurrence )
00204 {
00205    vector<Edge>::size_type stateIndex_1,
00206                             stateIndex_2;
00207    for( stateIndex_1 = 0; stateIndex_1 < main_trie.edgevector.size()-1; 
00208         stateIndex_1++ )
00209    {
00210       for( stateIndex_2 = 0; 
00211            stateIndex_2 < main_trie.edgevector.size() - 1 - stateIndex_1; 
00212            stateIndex_2++ )
00213       {
00214         if( temp_counter_array[stateIndex_1][stateIndex_2] > min_occurrence )
00215            main_trie.edgevector[stateIndex_1].subtrie->add_empty_state( 
00216               stateIndex_1 + stateIndex_2 + 1, 
00217               temp_counter_array[stateIndex_1][stateIndex_2] );
00218       }
00219       temp_counter_array[stateIndex_1].clear();
00220       vector<countertype>().swap(temp_counter_array[stateIndex_1]);   
00222    }
00223    temp_counter_array.clear();
00224 
00225    vector< vector<countertype> >().swap(temp_counter_array);   
00227    vector<Edge>::iterator it= main_trie.edgevector.begin();
00228    while( it!=main_trie.edgevector.end() )
00229       if((*it).subtrie->edgevector.empty())
00230       {
00231          delete (*it).subtrie;
00232          it = main_trie.edgevector.erase(it);
00233       }
00234       else it++;
00235 }

Generated on Fri Mar 11 14:48:06 2005 for APRIORI algorithm by  doxygen 1.3.9.1