Main Page | Namespace List | Class List | File List | Class Members | File Members

Apriori.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002                           apriori.cpp  -  description
00003                              -------------------
00004     begin                : cs dec 26 2002
00005     copyright            : (C) 2002 by Ferenc Bodon
00006     email                : bodon@mit.bme.hu
00007  ***************************************************************************/
00008 
00009 #include "Apriori.hpp"
00010 #include <iostream>
00011 #include <vector>
00012 #include <set>
00013 #include <cmath>   //because of the ceil function
00014 
00015 using namespace std;
00016 
00021 void Apriori::support( const itemtype& candidate_size )
00022 {
00023    set<itemtype> basket;
00024    vector<itemtype> basket_v;
00025    if( store_input )
00026    {
00027       if (candidate_size == 2)
00028       {
00029          while( input_output_manager.read_in_a_line( basket ) )
00030          {
00031             input_output_manager.basket_recode( basket, basket_v );
00032             if (basket_v.size()>1) reduced_baskets[basket_v]++;
00033          }
00034       }
00035       for (map<vector<itemtype>,unsigned long>::iterator it = 
00036               reduced_baskets.begin(); it!=reduced_baskets.end();it++)
00037          apriori_trie->find_candidate(it->first,candidate_size,it->second);
00038    }
00039    else while( input_output_manager.read_in_a_line( basket ) )
00040    {
00041       input_output_manager.basket_recode(basket, basket_v);
00042           apriori_trie->find_candidate(basket_v,candidate_size);
00043    }
00044 }
00053 Apriori::Apriori( ifstream& basket_file, const char*  output_file_name, 
00054                   const bool store_input):
00055                   input_output_manager(basket_file, output_file_name ),
00056                   store_input(store_input)
00057 {
00058 }
00059 
00069 void Apriori::APRIORI_alg( const double min_supp, const double min_conf, 
00070                            const bool quiet, 
00071                            const unsigned long size_threshold )
00072 {
00073    unsigned long basket_number;   
00074    if(!quiet) cout<<endl<<"\t\tFinding frequent itemsets..."<<endl<<endl;
00075    itemtype candidate_size=1;
00076    itemtype longest_path,longest_path_after_delete=1;
00077    if(!quiet)
00078    {
00079       cout<<endl<<"Determining the support of the items";
00080       cout<<" and deleting infrequent ones!"<<endl;
00081    }
00082    vector<unsigned long> support_of_items;
00083    basket_number = input_output_manager.find_frequent_items( 
00084       min_supp, support_of_items );
00085    apriori_trie = new Apriori_Trie( basket_number );
00086    apriori_trie->insert_frequent_items( support_of_items );
00087 
00088 //   apriori_trie->show_content();
00089 //   getchar();
00090    double min_supp_abs = min_supp * basket_number;
00091    longest_path_after_delete = apriori_trie->longest_path();
00092 //   apriori_trie->show_content();
00093 //   getchar();
00094    longest_path=apriori_trie->longest_path();
00095    candidate_size++;
00096    if(!quiet) 
00097    {
00098       cout<<endl<<"Genarating "<<candidate_size;
00099       cout<<"-itemset candidates!"<<endl;
00100    }
00101    apriori_trie->candidate_generation(candidate_size-1);
00102 //   apriori_trie->show_content();
00103 //   getchar();
00104    while( longest_path<apriori_trie->longest_path() )
00105    {
00106       input_output_manager.rewind();
00107       if(!quiet)
00108       {
00109          cout<<"Determining the support of the "<<candidate_size;
00110          cout<<"-itemset candidates!"<<endl;
00111       }
00112       support( candidate_size );
00113 //      apriori_trie->show_content();
00114 //      getchar();
00115       if(!quiet) cout<<"Deleting infrequent itemsets!"<<endl;
00116       apriori_trie->delete_infrequent(min_supp_abs, candidate_size);
00117       longest_path_after_delete=apriori_trie->longest_path();
00118 //      apriori_trie->show_content();
00119 //      getchar();
00120       if (candidate_size == size_threshold )
00121       {
00122          if(!quiet) cout<<"Size threshold is reached!"<<endl;
00123          break;
00124       }
00125       longest_path=apriori_trie->longest_path();
00126       candidate_size++;
00127       if( !quiet )
00128       {
00129          cout<<endl<<"Genarating "<<candidate_size;
00130          cout<<"-itemset candidates!"<<endl;
00131       }
00132       apriori_trie->candidate_generation(candidate_size-1);
00133 //      apriori_trie->show_content_preorder();
00134 //      getchar();
00135    }
00136    apriori_trie->write_content_to_file( input_output_manager );
00137    if (min_conf)
00138    {
00139       if(!quiet) cout<<"\nGenerating association rules...!\n";
00140       apriori_trie->association( min_conf, input_output_manager );
00141    }
00142    if(!quiet) cout<<"\nMining is done!\n";
00143 }
00144 
00145 Apriori::~Apriori()
00146 {
00147    delete apriori_trie;
00148 }

Generated on Mon Aug 16 19:59:29 2004 for APRIORI algorithm by doxygen 1.3.5