Main Page | Namespace List | Class List | File List | Class Members | File Members

Input_Output_Manager.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002                           Input_Output_Manager.cpp  -  description
00003                              -------------------
00004     begin                : cs march 10 2004
00005     copyright            : (C) 2004 by Ferenc Bodon
00006     email                : bodon@cs.bme.hu
00007  ***************************************************************************/
00008 
00009 
00014 #include "Input_Output_Manager.hpp"
00015 
00016 Input_Output_Manager::Input_Output_Manager( ifstream& basket_file, 
00017                                             const char* output_file_name ):
00018    ofstream(output_file_name), basket_file(basket_file)
00019 {
00020 }
00021 
00026 int Input_Output_Manager::read_in_a_line( set<itemtype>& basket )
00027 {
00028    if( basket_file.eof() ) return 0;
00029    char          c;
00030    itemtype      pos;
00031 
00032    basket.clear();
00033    do
00034    {
00035       int item = 0;
00036       pos = 0;
00037       basket_file.get(c);
00038       while(basket_file.good() && (c >= '0') && (c <= '9'))
00039       {
00040          item *= 10;
00041          item += int(c)-int('0');
00042          basket_file.get(c);
00043          pos++;
00044       }
00045       if( pos ) basket.insert( (itemtype) item );
00046    }
00047    while( !basket_file.eof() && c != '\n' );
00048    return 1;
00049 }
00050 
00058 unsigned long Input_Output_Manager::find_frequent_items( 
00059    const double min_supp, vector<unsigned long>& support_of_items )
00060 {
00061    unsigned long basket_number = 0;
00062    set<itemtype> basket;
00063    vector< unsigned long > temp_counter_vector;
00064 
00066    set<itemtype>::iterator it_basket;
00067    while( read_in_a_line( basket ) )
00068    {      
00069       if( !basket.empty() )
00070       {
00071          basket_number++;
00072          for( it_basket = basket.begin(); it_basket != basket.end(); 
00073               it_basket++ )
00074          {
00075             if( *it_basket + 1  > temp_counter_vector.size() )
00076                temp_counter_vector.resize( *it_basket + 1, 0 );
00077             temp_counter_vector[*it_basket]++;
00078          }
00079       }
00080    }
00081 
00083    double long min_occurrence = min_supp * (basket_number - 0.5);
00084    vector<unsigned long>::size_type edgeIndex;
00085 
00086    set< pair<unsigned long, itemtype> > temp_set;
00087    for( itemtype edgeIndex = 0; edgeIndex < temp_counter_vector.size(); 
00088         edgeIndex++ )
00089       if( temp_counter_vector[edgeIndex] > min_occurrence )
00090          temp_set.insert(
00091             pair<unsigned long, itemtype>(temp_counter_vector[edgeIndex],
00092                                           edgeIndex));
00093 
00094    new_code_inverse.clear();
00095    support_of_items.clear();
00096    for(set< pair<unsigned long, itemtype> >::iterator it = temp_set.begin();
00097        it != temp_set.end(); it++)
00098    {
00099          new_code_inverse.push_back((*it).second);
00100          support_of_items.push_back((*it).first);
00101    }
00102 //   reverse( new_code_inverse.begin(),new_code_inverse.end() );
00103 //   reverse( support_of_items.begin(), support_of_items.end() );
00104    vector<itemtype>(new_code_inverse).swap(new_code_inverse);
00105    vector<unsigned long >(support_of_items).swap(support_of_items);
00106 
00107    new_code.reserve(  temp_counter_vector.size() + 1 );
00108    new_code.resize(  temp_counter_vector.size() + 1, 0 );
00109    for( edgeIndex = 0; edgeIndex < new_code_inverse.size(); edgeIndex++ )
00110       new_code[new_code_inverse[edgeIndex]] = edgeIndex+1;
00111    return basket_number;
00112 }
00117 void Input_Output_Manager::basket_recode( 
00118    const set<itemtype>& original_basket, vector<itemtype>& new_basket )
00119 {
00120    new_basket.clear();
00121    for( set<itemtype>::iterator it_basket = original_basket.begin(); 
00122         it_basket != original_basket.end(); it_basket++ )
00123      if( new_code[*it_basket] ) new_basket.push_back( new_code[*it_basket]-1 );
00124    sort( new_basket.begin(), new_basket.end() );     
00125 }
00126 
00127 void Input_Output_Manager::write_out_basket(const set<itemtype>& basket)
00128 {
00129    for( set<itemtype>::const_iterator it_item = basket.begin(); 
00130         it_item != basket.end(); it_item++)
00131    {
00132       operator<<( new_code_inverse[*it_item] );
00133       put(' ');
00134    }
00135 }
00136 
00137 void Input_Output_Manager::write_out_basket_and_counter(
00138    const set<itemtype>& itemset, const unsigned long counter)
00139 {
00140    for( set<itemtype>::const_iterator it_item = itemset.begin(); 
00141         it_item != itemset.end(); it_item++)
00142    {
00143       operator<<( new_code_inverse[*it_item] );
00144       put(' ');
00145    }
00146    put('(');
00147    operator<<(counter);
00148    write(")\n",2);
00149 }
00150 
00151 void Input_Output_Manager::rewind()
00152 {
00153    basket_file.clear();
00154    basket_file.seekg(0, ios::beg);
00155 }
00156 
00157 Input_Output_Manager::~Input_Output_Manager()
00158 {
00159    close();
00160 }

Generated on Mon Aug 16 19:59:29 2004 for APRIORI algorithm by doxygen 1.3.5