Main Page | Namespace List | Class List | File List | Class Members | File Members

Input_Output_Manager.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002                           Input_Output_Manager.cpp  -  description
00003                              -------------------
00004     begin                : cs march 10 2004
00005     copyright            : (C) 2004 by Ferenc Bodon
00006     email                : bodon@cs.bme.hu
00007  ***************************************************************************/
00008 
00009 
00014 #include "Input_Output_Manager.hpp"
00015 #include <algorithm>
00016 
00017 Input_Output_Manager::Input_Output_Manager( ifstream& basket_file, 
00018                                             const char* output_file_name ):
00019    ofstream(output_file_name), basket_file(basket_file)
00020 {
00021 }
00022 
00027 int Input_Output_Manager::read_in_a_line( set<itemtype>& basket )
00028 {
00029    if( basket_file.eof() ) return 0;
00030    char          c;
00031    itemtype      pos;
00032 
00033    basket.clear();
00034    do
00035    {
00036       int item = 0;
00037       pos = 0;
00038       basket_file.get(c);
00039       while(basket_file.good() && (c >= '0') && (c <= '9'))
00040       {
00041          item *= 10;
00042          item += int(c)-int('0');
00043          basket_file.get(c);
00044          pos++;
00045       }
00046       if( pos ) basket.insert( (itemtype) item );
00047    }
00048    while( !basket_file.eof() && c != '\n' );
00049    return 1;
00050 }
00051 
00059 countertype Input_Output_Manager::find_frequent_items( 
00060    const double min_supp, vector<countertype>& support_of_items )
00061 {
00062    countertype basket_number = 0;
00063    set<itemtype> basket;
00064    vector< countertype > temp_counter_vector;
00065 
00067    set<itemtype>::iterator it_basket;
00068    while( read_in_a_line( basket ) )
00069    {      
00070       if( !basket.empty() )
00071       {
00072          basket_number++;
00073          for( it_basket = basket.begin(); it_basket != basket.end(); 
00074               it_basket++ )
00075          {
00076             if( *it_basket + 1  > temp_counter_vector.size() )
00077                temp_counter_vector.resize( *it_basket + 1, 0 );
00078             temp_counter_vector[*it_basket]++;
00079          }
00080       }
00081    }
00082 
00084    double min_occurrence = min_supp * (basket_number - 0.5);
00085    vector<countertype>::size_type edgeIndex;
00086 
00087    set< pair<countertype, itemtype> > temp_set;
00088    for( edgeIndex = 0; edgeIndex < temp_counter_vector.size(); 
00089         edgeIndex++ )
00090       if( temp_counter_vector[edgeIndex] > min_occurrence )
00091          temp_set.insert(
00092             pair<countertype, itemtype>(temp_counter_vector[edgeIndex],
00093                                           edgeIndex));
00094 
00095    new_code_inverse.clear();
00096    support_of_items.clear();
00097    for(set< pair<countertype, itemtype> >::iterator it = temp_set.begin();
00098        it != temp_set.end(); it++)
00099    {
00100          new_code_inverse.push_back((*it).second);
00101          support_of_items.push_back((*it).first);
00102    }
00103 //   reverse( new_code_inverse.begin(),new_code_inverse.end() );
00104 //   reverse( support_of_items.begin(), support_of_items.end() );
00105    vector<itemtype>(new_code_inverse).swap(new_code_inverse);
00106    vector<countertype>(support_of_items).swap(support_of_items);
00107 
00108    new_code.reserve(  temp_counter_vector.size() + 1 );
00109    new_code.resize(  temp_counter_vector.size() + 1, 0 );
00110    for( edgeIndex = 0; edgeIndex < new_code_inverse.size(); edgeIndex++ )
00111       new_code[new_code_inverse[edgeIndex]] = edgeIndex+1;
00112    return basket_number;
00113 }
00118 void Input_Output_Manager::basket_recode( 
00119    const set<itemtype>& original_basket, vector<itemtype>& new_basket )
00120 {
00121    new_basket.clear();
00122    for( set<itemtype>::const_iterator it_basket = original_basket.begin(); 
00123         it_basket != original_basket.end(); it_basket++ )
00124      if( new_code[*it_basket] ) new_basket.push_back( new_code[*it_basket]-1 );
00125    sort( new_basket.begin(), new_basket.end() );     
00126 }
00127 
00128 void Input_Output_Manager::write_out_basket(const set<itemtype>& basket)
00129 {
00130    for( set<itemtype>::const_iterator it_item = basket.begin(); 
00131         it_item != basket.end(); it_item++)
00132    {
00133       operator<<( new_code_inverse[*it_item] );
00134       put(' ');
00135    }
00136 }
00137 
00138 void Input_Output_Manager::write_out_basket_and_counter(
00139    const set<itemtype>& itemset, const countertype counter)
00140 {
00141    for( set<itemtype>::const_iterator it_item = itemset.begin(); 
00142         it_item != itemset.end(); it_item++)
00143    {
00144       operator<<( new_code_inverse[*it_item] );
00145       put(' ');
00146    }
00147    put('(');
00148    operator<<(counter);
00149    write(")\n",2);
00150 }
00151 
00152 void Input_Output_Manager::rewind()
00153 {
00154    basket_file.clear();
00155    basket_file.seekg(0, ios::beg);
00156 }
00157 
00158 Input_Output_Manager::~Input_Output_Manager()
00159 {
00160    close();
00161 }

Generated on Fri Mar 11 14:48:06 2005 for APRIORI algorithm by  doxygen 1.3.9.1