Main Page | Namespace List | Class List | File List | Class Members | File Members

Input_Output_Manager.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002                           Input_Output_Manager.cpp  -  description
00003                              -------------------
00004     begin                : cs march 10 2004
00005     copyright            : (C) 2004 by Ferenc Bodon
00006     email                : bodon@cs.bme.hu
00007  ***************************************************************************/
00008 
00009 
00014 #include "Input_Output_Manager.hpp"
00015 #include <iostream>
00016 
00017 Input_Output_Manager::Input_Output_Manager( ifstream& basket_file, const char* output_file_name ):ofstream(output_file_name), basket_file(basket_file)
00018 {
00019 }
00020 
00025 int Input_Output_Manager::read_in_a_line( set<itemtype>& basket )
00026 {
00027    if( basket_file.eof() ) return 0;
00028    char          c;
00029    itemtype      pos;
00030 
00031    basket.clear();
00032    do
00033    {
00034       int item = 0;
00035       pos = 0;
00036       basket_file.get(c);
00037       while(basket_file.good() && (c >= '0') && (c <= '9'))
00038       {
00039          pos++;
00040          item *= 10;
00041          item += int(c)-int('0');
00042          basket_file.get(c);
00043       }
00044       if( pos ) basket.insert( (itemtype) item );
00045    }
00046    while( c != '\n' && !basket_file.eof() );
00047    return 1;
00048 }
00049 
00057 unsigned long Input_Output_Manager::find_frequent_items( ifstream& mis_file,
00058                           set<pair<itemtype, unsigned long> >& support_of_items, 
00059                           vector<double>& mis_abs)
00060 {
00062    itemtype item;
00063    double mis_value;
00064 
00065    set<pair<double,itemtype> > temp_set;
00066 
00068    vector<double> mis_values_original;
00069    while(!mis_file.eof())
00070    {
00071       mis_file>>item;
00072       mis_file.ignore();
00073       mis_file>>mis_value;
00074       if(mis_file.fail()) break;
00075       if(item + 1> mis_values_original.size())
00076          mis_values_original.resize(item + 1);
00077       mis_values_original[item] = mis_value;
00078       temp_set.insert(pair<double,itemtype>(mis_value,item));
00079    }
00080    
00082    unsigned long basket_number = 0;
00083    set<itemtype> basket;
00084    vector<unsigned long> temp_counter_vector;
00085    set<itemtype>::iterator it_basket;
00086    while( read_in_a_line( basket ) )
00087    {      
00088       if( !basket.empty() )
00089       {
00090          basket_number++;
00091          for( it_basket = basket.begin(); it_basket != basket.end(); it_basket++ )
00092          {
00093             if( *it_basket + 1  > temp_counter_vector.size() )
00094                temp_counter_vector.resize( *it_basket + 1, 0 );
00095             temp_counter_vector[*it_basket]++;
00096          }
00097       }
00098    }
00099    
00101    set<pair<double,itemtype> >::iterator it_s = temp_set.begin();
00102    while( temp_counter_vector[(*it_s).second] < (*it_s).first * (basket_number - 0.5) 
00103           && it_s != temp_set.end() ) it_s++;
00106    new_code_inverse.clear();
00107    double mis_abs_min = (*it_s).first * (basket_number - 0.5),
00108       temp_mis_abs;
00109    while( it_s != temp_set.end() )
00110    {
00111       if (temp_counter_vector[(*it_s).second] > mis_abs_min )
00112       {
00113          temp_mis_abs = (*it_s).first * (basket_number - 0.5);
00114          if( temp_counter_vector[(*it_s).second] > temp_mis_abs )
00115             support_of_items.insert( pair<itemtype, unsigned long>(
00116                                      new_code_inverse.size(), temp_counter_vector[(*it_s).second] ));
00117          new_code_inverse.push_back((*it_s).second);
00118          mis_abs.push_back(temp_mis_abs);
00119       }
00120       it_s++;
00121    }
00122 
00123    vector<itemtype>(new_code_inverse).swap(new_code_inverse);
00124    vector<double>(mis_abs).swap(mis_abs);
00125    set< pair<itemtype, unsigned long> >(support_of_items).swap(support_of_items);
00126 
00127    new_code.reserve(  temp_counter_vector.size() + 1 );
00128    new_code.resize(  temp_counter_vector.size() + 1, 0 );
00129 
00130    for( itemtype edgeIndex = 0; edgeIndex < new_code_inverse.size(); edgeIndex++ )
00131       new_code[new_code_inverse[edgeIndex]] = edgeIndex+1;
00132    return basket_number;
00133 }
00138 void Input_Output_Manager::basket_recode( const set<itemtype>& original_basket, vector<itemtype>& new_basket )
00139 {
00140    new_basket.clear();
00141    for( set<itemtype>::iterator it_basket = original_basket.begin(); it_basket != original_basket.end()
00142                                                                    ; it_basket++ )
00143      if( new_code[*it_basket] ) new_basket.push_back( new_code[*it_basket]-1 );
00144    sort( new_basket.begin(), new_basket.end() );     
00145 }
00146 
00147 void Input_Output_Manager::write_out_basket(const set<itemtype>& basket)
00148 {
00149    for( set<itemtype>::const_iterator it_item = basket.begin(); it_item != basket.end(); it_item++)
00150    {
00151       operator<<( new_code_inverse[*it_item] );
00152       put(' ');
00153    }
00154 }
00155 
00156 void Input_Output_Manager::write_out_basket_and_counter(const set<itemtype>& itemset, const unsigned long counter)
00157 {
00158    for( set<itemtype>::const_iterator it_item = itemset.begin(); it_item != itemset.end(); it_item++)
00159    {
00160       operator<<( new_code_inverse[*it_item] );
00161       put(' ');
00162    }
00163    put('(');
00164    operator<<(counter);
00165    write(")\n",2);
00166 }
00167 
00168 void Input_Output_Manager::rewind()
00169 {
00170    basket_file.clear();
00171    basket_file.seekg(0, ios::beg);
00172 }
00173 
00174 Input_Output_Manager::~Input_Output_Manager()
00175 {
00176    close();
00177 }

Generated on Sun Jun 20 23:41:08 2004 for APRIORI algorithm by doxygen 1.3.5