00001 #ifndef SeqFrequentFilter_CPP
00002 #define SeqFrequentFilter_CPP
00003
00008 #include "common.hpp"
00009 #include "common/log.h"
00010 #include <vector>
00011
00012
00018 template <class IT_R>
00019 class SeqFrequentFilter
00020 {
00021 public:
00022 SeqFrequentFilter<IT_R>(IT_R& it_r):it_r(it_r){}
00023
00029 void findFrequentItems(
00030 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00031 counter_t& nr_of_transactions, counter_t& min_supp,
00032 bool relative=false, double relminsupp=0.0);
00033 private:
00034 IT_R& it_r;
00035
00036
00037 };
00038
00047 template <class IT_R> void SeqFrequentFilter<IT_R>::findFrequentItems(
00048 std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00049 counter_t& nr_of_transactions, counter_t& min_supp,
00050 bool relative, double relminsupp)
00051 {
00052 freq_items_with_counters.clear();
00053 nr_of_transactions = 0;
00054 it_r.rewind();
00055
00056 std::vector<item_t> transaction;
00057 std::vector<bool> pattern;
00058 std::vector< counter_t > temp_counter_vector;
00059
00061 std::vector<item_t>::iterator it_transaction;
00062 item_t nr_of_items = 0;
00063 item_t sum_of_lengths = 0;
00064 while( it_r.nextTransactionBIS( transaction ) )
00065 {
00066 if( !transaction.empty() )
00067 {
00068 nr_of_transactions++;
00069 for( it_transaction = transaction.begin();
00070 it_transaction != transaction.end(); ++it_transaction )
00071 {
00072 if( *it_transaction + 1 > temp_counter_vector.size() )
00073 {
00074 temp_counter_vector.resize( *it_transaction + 1, 0 );
00075 pattern.resize( *it_transaction + 1, 1 );
00076 }
00077 if(pattern[*it_transaction])
00078 {
00079 ++temp_counter_vector[*it_transaction];
00080 pattern[*it_transaction] = false;
00081 }
00082 }
00083 for( it_transaction = transaction.begin();
00084 it_transaction != transaction.end(); ++it_transaction )
00085 pattern[*it_transaction] = true;
00086 #if DEBUG_LEVEL >= LEVEL_DBG
00087 sum_of_lengths += transaction.size();
00088 #endif
00089 }
00090 }
00091 it_r.setLargestItem( temp_counter_vector.size() - 1 );
00092 log_info(0,"Largest itemcode: %d", temp_counter_vector.size() - 1);
00093
00094 if(relative)
00095 min_supp = static_cast<unsigned int>(relminsupp * nr_of_transactions);
00097 for( std::vector< counter_t >::size_type index = 0;
00098 index < temp_counter_vector.size(); ++index )
00099 {
00100 if(temp_counter_vector[index] >= min_supp)
00101 {
00102 std::pair<counter_t, item_t> temp_pair(
00103 temp_counter_vector[index],index);
00104 freq_items_with_counters.push_back(temp_pair);
00105 }
00106 #if DEBUG_LEVEL >= LEVEL_DBG
00107 if(temp_counter_vector[index] )
00108 ++nr_of_items;
00109 #endif
00110 }
00111 log_dbg(0,"Number of items: %d", nr_of_items);
00112 log_dbg(0,"Average of the transactions' sizes: %d",
00113 sum_of_lengths / nr_of_transactions);
00114 }
00115
00116 #endif