Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

SeqFrequentFilter.cpp

Go to the documentation of this file.
00001 #ifndef SeqFrequentFilter_CPP
00002 #define SeqFrequentFilter_CPP
00003 
00008 #include "common.hpp"  
00009 #include "common/log.h"  
00010 #include <vector>
00011 
00012 
00018 template <class IT_R>
00019 class SeqFrequentFilter
00020 {
00021    public:
00022       SeqFrequentFilter<IT_R>(IT_R& it_r):it_r(it_r){}
00023 
00029       void findFrequentItems(
00030          std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00031          counter_t& nr_of_transactions, counter_t& min_supp, 
00032          bool relative=false, double relminsupp=0.0);
00033    private:
00034       IT_R& it_r;
00035 
00036 
00037 };
00038 
00047 template <class IT_R> void SeqFrequentFilter<IT_R>::findFrequentItems(
00048    std::vector< std::pair<counter_t, item_t> >& freq_items_with_counters,
00049    counter_t& nr_of_transactions, counter_t& min_supp, 
00050    bool relative, double relminsupp)
00051 {
00052    freq_items_with_counters.clear();
00053    nr_of_transactions = 0;
00054    it_r.rewind();
00055 
00056    std::vector<item_t> transaction;
00057    std::vector<bool> pattern;
00058    std::vector< counter_t > temp_counter_vector;
00059 
00061    std::vector<item_t>::iterator it_transaction;
00062    item_t nr_of_items = 0;
00063    item_t sum_of_lengths = 0;
00064    while( it_r.nextTransactionBIS( transaction ) )
00065    {      
00066       if( !transaction.empty() )
00067       {
00068          nr_of_transactions++;
00069          for( it_transaction = transaction.begin(); 
00070               it_transaction != transaction.end(); ++it_transaction )
00071          {
00072             if( *it_transaction + 1  > temp_counter_vector.size() )
00073             {
00074                temp_counter_vector.resize( *it_transaction + 1, 0 );
00075                pattern.resize( *it_transaction + 1, 1 );
00076             }
00077             if(pattern[*it_transaction])
00078             {
00079                ++temp_counter_vector[*it_transaction];
00080                pattern[*it_transaction] = false;
00081             }
00082          }
00083          for( it_transaction = transaction.begin(); 
00084               it_transaction != transaction.end(); ++it_transaction )
00085                pattern[*it_transaction] = true;
00086 #if DEBUG_LEVEL >= LEVEL_DBG
00087          sum_of_lengths += transaction.size();
00088 #endif
00089       }
00090    }
00091    it_r.setLargestItem( temp_counter_vector.size() - 1 );
00092    log_info(0,"Largest itemcode: %d", temp_counter_vector.size() - 1);
00093       
00094    if(relative)
00095       min_supp = static_cast<unsigned int>(relminsupp * nr_of_transactions);
00097    for( std::vector< counter_t >::size_type index = 0; 
00098         index < temp_counter_vector.size(); ++index )
00099    {
00100       if(temp_counter_vector[index] >= min_supp)
00101       {
00102          std::pair<counter_t, item_t> temp_pair(
00103             temp_counter_vector[index],index);
00104          freq_items_with_counters.push_back(temp_pair);
00105       }
00106 #if DEBUG_LEVEL >= LEVEL_DBG
00107       if(temp_counter_vector[index] )
00108          ++nr_of_items;
00109 #endif
00110    }
00111    log_dbg(0,"Number of items: %d", nr_of_items);
00112    log_dbg(0,"Average of the transactions' sizes: %d", 
00113            sum_of_lengths / nr_of_transactions);
00114 }
00115 
00116 #endif

Generated on Sun Sep 17 17:50:39 2006 for FIM environment by  doxygen 1.4.4