Main Page | Namespace List | Class List | File List | Class Members | File Members

MSApriori.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002                           MSApriori.cpp  -  description
00003                              -------------------
00004     begin                : cs dec 26 2002
00005     copyright            : (C) 2002 by Ferenc Bodon
00006     email                : bodon@mit.bme.hu
00007  ***************************************************************************/
00008 
00009 #include "MSApriori.hpp"
00010 #include <iostream>
00011 #include <vector>
00012 #include <set>
00013 
00014 
00015 using namespace std;
00016 
00021 void MSApriori::support( const itemtype& candidate_size )
00022 {
00023    set<itemtype> basket;
00024    vector<itemtype> basket_v;
00025    if( store_input )
00026    {
00027       if (candidate_size == 2)
00028       {
00029          while( input_output_manager.read_in_a_line( basket ) )
00030          {
00031             input_output_manager.basket_recode( basket, basket_v );
00032             if (basket_v.size()>1) reduced_baskets[basket_v]++;
00033          }
00034       }
00035       for (map<vector<itemtype>,unsigned long>::iterator it = reduced_baskets.begin();
00036            it!=reduced_baskets.end();it++)
00037          msapriori_trie->find_candidate(it->first,candidate_size,it->second);
00038    }
00039    else while( input_output_manager.read_in_a_line( basket ) )
00040    {
00041       input_output_manager.basket_recode(basket, basket_v);
00042           msapriori_trie->find_candidate(basket_v,candidate_size);
00043    }
00044 }
00045 
00054 MSApriori::MSApriori( ifstream& basket_file, const char*  output_file_name, 
00055                   const bool store_input):
00056                   input_output_manager(basket_file, output_file_name ),
00057                   store_input(store_input)
00058 {
00059 }
00060 
00068 void MSApriori::MSAPRIORI_alg( ifstream& mis_file, const double min_conf, 
00069                                const bool quiet, const unsigned long size_threshold )
00070 {
00071    vector<double> mis_abs;
00072    if(!quiet) cout<<endl<<"\t\tFinding frequent itemsets..."<<endl<<endl;
00073    itemtype candidate_size=1;
00074    itemtype longest_path,longest_path_after_delete=1;
00075    if(!quiet)
00076       cout<<endl<<"Determining the support of the items and deleting infrequent ones!"<<endl;
00077    set< pair<itemtype, unsigned long> > support_of_items;
00078    unsigned long basket_number = input_output_manager.find_frequent_items( mis_file,
00079                                                                            support_of_items, mis_abs );
00080    msapriori_trie = new MSApriori_Trie( basket_number, mis_abs );
00081    msapriori_trie->insert_frequent_items( support_of_items );
00082 
00083 //   apriori_trie->show_content();
00084 //   getchar();
00085    longest_path_after_delete = msapriori_trie->longest_path();
00086 //   apriori_trie->show_content();
00087 //   getchar();
00088    longest_path = msapriori_trie->longest_path();
00089    candidate_size++;
00090    if(!quiet) cout<<endl<<"Genarating "<<candidate_size<<"-itemset candidates!"<<endl;
00091    msapriori_trie->candidate_generation( candidate_size-1 );
00092 //   apriori_trie->show_content();
00093 //   getchar();
00094    while( longest_path < msapriori_trie->longest_path() )
00095    {
00096       input_output_manager.rewind();
00097       if(!quiet) cout<<"Determining the support of the "<<candidate_size<<"-itemset candidates!"<<endl;
00098       support( candidate_size );
00099 //      apriori_trie->show_content();
00100 //      getchar();
00101       if(!quiet) cout<<"Deleting infrequent itemsets!"<<endl;
00102       msapriori_trie->delete_infrequent( candidate_size);
00103       longest_path_after_delete = msapriori_trie->longest_path();
00104 //      apriori_trie->show_content();
00105 //      getchar();
00106       if (candidate_size == size_threshold )
00107       {
00108          if(!quiet) cout<<"Size threshold is reached!"<<endl;
00109          break;
00110       }
00111       longest_path = msapriori_trie->longest_path();
00112       candidate_size++;
00113       if( !quiet ) cout<<endl<<"Genarating "<<candidate_size<<"-itemset candidates!"<<endl;
00114       msapriori_trie->candidate_generation(candidate_size-1);
00115 //      apriori_trie->show_content_preorder();
00116 //      getchar();
00117    }
00118    msapriori_trie->write_content_to_file( input_output_manager );
00119    if (min_conf)
00120    {
00121       if(!quiet) cout<<"\nGenerating association rules...!\n";
00122       msapriori_trie->association( min_conf, input_output_manager );
00123    }
00124    if(!quiet) cout<<"\nMining is done!\n";
00125 }
00126 
00127 MSApriori::~MSApriori()
00128 {
00129    delete msapriori_trie;
00130 }
00131 

Generated on Sun Jun 20 23:41:08 2004 for APRIORI algorithm by doxygen 1.3.5