00001
00002
00003
00004
00005
00006
00007
00008
00009
00014 #include "Input_Output_Manager.hpp"
00015
00016 Input_Output_Manager::Input_Output_Manager( ifstream& basket_file,
00017 const char* output_file_name ):
00018 ofstream(output_file_name), basket_file(basket_file)
00019 {
00020 }
00021
00026 int Input_Output_Manager::read_in_a_line( set<itemtype>& basket )
00027 {
00028 if( basket_file.eof() ) return 0;
00029 char c;
00030 itemtype pos;
00031
00032 basket.clear();
00033 do
00034 {
00035 int item = 0;
00036 pos = 0;
00037 basket_file.get(c);
00038 while(basket_file.good() && (c >= '0') && (c <= '9'))
00039 {
00040 item *= 10;
00041 item += int(c)-int('0');
00042 basket_file.get(c);
00043 pos++;
00044 }
00045 if( pos ) basket.insert( (itemtype) item );
00046 }
00047 while( !basket_file.eof() && c != '\n' );
00048 return 1;
00049 }
00050
00058 unsigned long Input_Output_Manager::find_frequent_items(
00059 const double min_supp, vector<unsigned long>& support_of_items )
00060 {
00061 unsigned long basket_number = 0;
00062 set<itemtype> basket;
00063 vector< unsigned long > temp_counter_vector;
00064
00066 set<itemtype>::iterator it_basket;
00067 while( read_in_a_line( basket ) )
00068 {
00069 if( !basket.empty() )
00070 {
00071 basket_number++;
00072 for( it_basket = basket.begin(); it_basket != basket.end();
00073 it_basket++ )
00074 {
00075 if( *it_basket + 1 > temp_counter_vector.size() )
00076 temp_counter_vector.resize( *it_basket + 1, 0 );
00077 temp_counter_vector[*it_basket]++;
00078 }
00079 }
00080 }
00081
00083 double long min_occurrence = min_supp * (basket_number - 0.5);
00084 vector<unsigned long>::size_type edgeIndex;
00085
00086 set< pair<unsigned long, itemtype> > temp_set;
00087 for( itemtype edgeIndex = 0; edgeIndex < temp_counter_vector.size();
00088 edgeIndex++ )
00089 if( temp_counter_vector[edgeIndex] > min_occurrence )
00090 temp_set.insert(
00091 pair<unsigned long, itemtype>(temp_counter_vector[edgeIndex],
00092 edgeIndex));
00093
00094 new_code_inverse.clear();
00095 support_of_items.clear();
00096 for(set< pair<unsigned long, itemtype> >::iterator it = temp_set.begin();
00097 it != temp_set.end(); it++)
00098 {
00099 new_code_inverse.push_back((*it).second);
00100 support_of_items.push_back((*it).first);
00101 }
00102
00103
00104 vector<itemtype>(new_code_inverse).swap(new_code_inverse);
00105 vector<unsigned long >(support_of_items).swap(support_of_items);
00106
00107 new_code.reserve( temp_counter_vector.size() + 1 );
00108 new_code.resize( temp_counter_vector.size() + 1, 0 );
00109 for( edgeIndex = 0; edgeIndex < new_code_inverse.size(); edgeIndex++ )
00110 new_code[new_code_inverse[edgeIndex]] = edgeIndex+1;
00111 return basket_number;
00112 }
00117 void Input_Output_Manager::basket_recode(
00118 const set<itemtype>& original_basket, vector<itemtype>& new_basket )
00119 {
00120 new_basket.clear();
00121 for( set<itemtype>::iterator it_basket = original_basket.begin();
00122 it_basket != original_basket.end(); it_basket++ )
00123 if( new_code[*it_basket] ) new_basket.push_back( new_code[*it_basket]-1 );
00124 sort( new_basket.begin(), new_basket.end() );
00125 }
00126
00127 void Input_Output_Manager::write_out_basket(const set<itemtype>& basket)
00128 {
00129 for( set<itemtype>::const_iterator it_item = basket.begin();
00130 it_item != basket.end(); it_item++)
00131 {
00132 operator<<( new_code_inverse[*it_item] );
00133 put(' ');
00134 }
00135 }
00136
00137 void Input_Output_Manager::write_out_basket_and_counter(
00138 const set<itemtype>& itemset, const unsigned long counter)
00139 {
00140 for( set<itemtype>::const_iterator it_item = itemset.begin();
00141 it_item != itemset.end(); it_item++)
00142 {
00143 operator<<( new_code_inverse[*it_item] );
00144 put(' ');
00145 }
00146 put('(');
00147 operator<<(counter);
00148 write(")\n",2);
00149 }
00150
00151 void Input_Output_Manager::rewind()
00152 {
00153 basket_file.clear();
00154 basket_file.seekg(0, ios::beg);
00155 }
00156
00157 Input_Output_Manager::~Input_Output_Manager()
00158 {
00159 close();
00160 }