Main Page | Namespace List | Class List | File List | Class Members | File Members

Apriori.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002                           apriori.cpp  -  description
00003                              -------------------
00004     begin                : cs dec 26 2002
00005     copyright            : (C) 2002 by Ferenc Bodon
00006     email                : bodon@cs.bme.hu
00007  ***************************************************************************/
00008 
00009 #include "Apriori.hpp"
00010 #include <iostream>
00011 #include <vector>
00012 #include <set>
00013 #include <cmath>   //because of the ceil function
00014 
00015 using namespace std;
00016 
00021 void Apriori::support( const itemtype& candidate_size )
00022 {
00023    set<itemtype> basket;
00024    vector<itemtype> basket_v;
00025    if( store_input )
00026    {
00027       if (candidate_size == 2)
00028       {
00029          while( input_output_manager.read_in_a_line( basket ) )
00030          {
00031             input_output_manager.basket_recode( basket, basket_v );
00032             if (basket_v.size()>1) reduced_baskets[basket_v]++;
00033          }
00034       }
00035       for (map<vector<itemtype>, countertype>::iterator it = 
00036               reduced_baskets.begin(); it!=reduced_baskets.end();it++)
00037          apriori_trie->find_candidate(it->first,candidate_size,it->second);
00038    }
00039    else while( input_output_manager.read_in_a_line( basket ) )
00040    {
00041       input_output_manager.basket_recode(basket, basket_v);
00042           apriori_trie->find_candidate(basket_v,candidate_size);
00043    }
00044 }
00045 
00055 void Apriori::APRIORI_alg( const double min_supp, 
00056                            const bool quiet, 
00057                            const unsigned int size_threshold )
00058 {
00059    countertype basket_number;   
00060    if(!quiet) cout<<endl<<"\t\tFinding frequent itemsets..."<<endl<<endl;
00061    itemtype candidate_size=1;
00062    if(!quiet)
00063    {
00064       cout<<endl<<"Determining the support of the items";
00065       cout<<" and deleting infrequent ones!"<<endl;
00066    }
00067    vector<countertype> support_of_items;
00068    basket_number = input_output_manager.find_frequent_items( 
00069       min_supp, support_of_items );
00070    input_output_manager<< "Frequent 0-itemsets:\nitemset (occurrence)\n";
00071    input_output_manager<< "{} ("<< basket_number << ")\n";
00072    input_output_manager<< "Frequent " << 1;
00073    input_output_manager<< "-itemsets:\nitemset (occurrence)\n";
00074    set<itemtype> temp_set;
00075    for(vector<countertype>::size_type index = 0;
00076       index < support_of_items.size(); index++)
00077    {
00078       temp_set.insert(index);
00079       input_output_manager.write_out_basket_and_counter( 
00080          temp_set, support_of_items[index] );
00081       temp_set.erase(temp_set.begin());
00082    }
00083 
00084    apriori_trie = new Apriori_Trie( basket_number );
00085    apriori_trie->insert_frequent_items( support_of_items );
00086 
00087 //   apriori_trie->show_content();
00088 //   getchar();
00089    double min_supp_abs = min_supp * (basket_number - 0.5);
00090 //   apriori_trie->show_content();
00091 //   getchar();
00092    candidate_size++;
00093    if(!quiet) 
00094    {
00095       cout<<endl<<"Genarating "<<candidate_size;
00096       cout<<"-itemset candidates!"<<endl;
00097    }
00098    apriori_trie->candidate_generation(candidate_size-1,
00099                                       input_output_manager);
00100 //   apriori_trie->show_content();
00101 //   getchar();
00102    while( apriori_trie->is_there_any_candidate() )
00103    {
00104       input_output_manager.rewind();
00105       if(!quiet)
00106       {
00107          cout<<"Determining the support of the "<<candidate_size;
00108          cout<<"-itemset candidates!"<<endl;
00109       }
00110       support( candidate_size );
00111 //      apriori_trie->show_content();
00112 //      getchar();
00113       if(!quiet) cout<<"Deleting infrequent itemsets!"<<endl;
00114       apriori_trie->delete_infrequent(min_supp_abs, candidate_size);
00115 //      apriori_trie->show_content();
00116 //      getchar();
00117       if (candidate_size == size_threshold )
00118       {
00119          if(!quiet) cout<<"Size threshold is reached!"<<endl;
00120          break;
00121       }
00122       candidate_size++;
00123       if( !quiet )
00124       {
00125          cout<<endl<<"Genarating "<<candidate_size;
00126          cout<<"-itemset candidates!"<<endl;
00127       }
00128       input_output_manager<< "Frequent " << candidate_size-1;
00129       input_output_manager<< "-itemsets:\nitemset (occurrence)\n";
00130       apriori_trie->candidate_generation(candidate_size-1,
00131                                          input_output_manager);
00132 //      apriori_trie->show_content_preorder();
00133 //      getchar();
00134    }
00135    if(!quiet) cout<<endl<<"Mining is done!"<<endl;
00136 }

Generated on Fri Mar 11 14:48:06 2005 for APRIORI algorithm by  doxygen 1.3.9.1