Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

trie/trie_manipulators/IntersectProPruner.hpp

Go to the documentation of this file.
00001 #ifndef IntersectProPruner_HPP
00002 #define IntersectProPruner_HPP
00003 
00004 #include "common.hpp"
00005 #include "common/Edge.hpp"
00006 #include "apriori/bodon/trie/trie_manipulators/ManipulatorBase.hpp"
00007 #include <vector>
00008 //#include <cstdio>
00009 //#include <iterator>   //for test
00010 
00011 
00012 
00013 
00014 namespace Bodon
00015 {
00016    template <class DF_D, class TRIE, bool NEE>
00017    class IntersectProPruner : public ManipulatorBase<DF_D, TRIE>
00018    {
00019       protected:
00020          std::vector<Edge> extenders;
00021          mutable std::vector<item_t> ext_items;
00022          mutable std::vector<item_t> ext_nee;
00023          typedef ManipulatorBase<DF_D, TRIE> PARENT;
00024 
00025       public:
00026          IntersectProPruner( TRIE& main_trie, DF_D& df_decoder ) : 
00027             ManipulatorBase<DF_D, TRIE>(main_trie, df_decoder){}
00028 
00029       protected:
00030          
00031          void intersect( const TRIE* subset_trie ) const;
00032 
00033          void intersectNEE( const TRIE* subset_trie ) const;
00034 
00035          void filterNonExtenders( 
00036             const std::vector<const TRIE*>& subset_tries,
00037             const item_t leaf_item ) const;
00038 
00039          void filterNonExtendersNEE( 
00040             const std::vector<const TRIE*>& subset_tries,
00041             const item_t leaf_item ) const;
00042 
00043 
00044          bool findSubsetTries( 
00045             std::vector<item_t>& itemset, 
00046             std::vector<const TRIE*>& subset_trie) const;
00047 
00048          void generateCandidateAtParent(
00049             TRIE* trie, std::vector<item_t>& maybe_candidate);
00050          void generateCandidateAtParentNEE(
00051             TRIE* trie, std::vector<item_t>& maybe_candidate,
00052             std::vector<item_t>& NEEsum);
00053 
00054 
00055    };
00056 
00057    template <class DF_D, class TRIE, bool NEE> inline void 
00058    IntersectProPruner<DF_D, TRIE, NEE>::
00059    intersect( const TRIE* subset_trie) const
00060    {
00061       typename TRIE::const_iterator it_e( subset_trie->edgelist.begin() );
00062       std::vector<item_t>::iterator it_i(ext_items.begin());
00063       while(it_e != subset_trie->edgelist.end() && it_i != ext_items.end())
00064       {
00065          if(*it_e < *it_i)
00066             ++it_e;
00067          else if(*it_e > *it_i)
00068             it_i = ext_items.erase(it_i);
00069          else
00070          {
00071             ++it_e;
00072             ++it_i;
00073          }
00074       }
00075       ext_items.erase(it_i, ext_items.end());      
00076    }
00077 
00078    template <class DF_D, class TRIE, bool NEE> inline void 
00079    IntersectProPruner<DF_D, TRIE, NEE>::
00080    intersectNEE( const TRIE* subset_trie ) const
00081    {
00082       typename TRIE::const_iterator it_e( subset_trie->edgelist.begin() );
00083       std::vector<item_t>::const_iterator it_nee( subset_trie->neelist.begin() );
00084       std::vector<item_t>::iterator it_i(ext_items.begin());
00085       while(it_e != subset_trie->edgelist.end() && it_i != ext_items.end())
00086       {
00087          if(*it_e < *it_i)
00088             ++it_e;
00089          else if(*it_e > *it_i)
00090          {
00091             while(it_nee != subset_trie->neelist.end() && *it_nee < *it_i)
00092                ++it_nee;
00093             if(it_nee != subset_trie->neelist.end() && *it_nee == *it_i)
00094                ext_nee.push_back(*it_i);
00095             it_i = ext_items.erase(it_i);
00096          }
00097          else
00098          {
00099             ++it_e;
00100             ++it_i;
00101          }
00102       }
00103       if(it_i != ext_items.end())
00104       {
00105          std::vector<item_t>::iterator it_2(it_i);
00106          do
00107          {
00108             while(it_nee != subset_trie->neelist.end() && *it_nee < *it_i)
00109                ++it_nee;
00110             if(it_nee != subset_trie->neelist.end() && *it_nee == *it_i)
00111                ext_nee.push_back(*it_i);
00112             ++it_i;
00113          }
00114          while(it_i != ext_items.end());
00115          ext_items.erase(it_2, ext_items.end());
00116       }
00117    }
00118 
00119    template <class DF_D, class TRIE, bool NEE> inline void 
00120    IntersectProPruner<DF_D, TRIE, NEE>::
00121    filterNonExtenders( const std::vector<const TRIE*>& subset_tries,  
00122                        const item_t leaf_item ) const
00123    {
00124       std::vector<typename TRIE::const_iterator> subset_child_iters;
00125       subset_child_iters.reserve(subset_tries.size());
00126       for(typename std::vector<const TRIE*>::const_iterator it = subset_tries.begin(); 
00127           it !=  subset_tries.end(); ++it)
00128          subset_child_iters.push_back((*it)->edgelist.begin());
00129 
00130       for(std::vector<item_t>::size_type index = 0; 
00131           index < subset_tries.size(); ++index)
00132       {
00133          while( subset_child_iters[index] !=  subset_tries[index]->edgelist.end() &&
00134                 (*(subset_child_iters[index])).first < leaf_item)
00135             ++subset_child_iters[index];
00136          if( subset_child_iters[index] !=  subset_tries[index]->edgelist.end() &&
00137              (*(subset_child_iters[index])).first == leaf_item)
00138             intersect(static_cast<const TRIE*>((*(subset_child_iters[index])).second));
00139          else
00140          {
00141             ext_items.clear();
00142             break;
00143          }
00144       }
00145    }
00146 
00147 
00148    template <class DF_D, class TRIE, bool NEE> inline void 
00149    IntersectProPruner<DF_D, TRIE, NEE>::
00150    filterNonExtendersNEE( const std::vector<const TRIE*>& subset_tries,
00151                           const item_t leaf_item ) const
00152    {
00153       std::vector<typename TRIE::const_iterator> subset_child_iters; 
00154       std::vector<vector<item_t>::const_iterator> subset_nee_iters;
00155       subset_child_iters.reserve(subset_tries.size());
00156       subset_nee_iters.reserve(subset_tries.size());
00157       for(typename std::vector<const TRIE*>::const_iterator it = subset_tries.begin(); 
00158           it !=  subset_tries.end(); ++it)
00159       {
00160          subset_child_iters.push_back((*it)->edgelist.begin());
00161          subset_nee_iters.push_back((*it)->neelist.begin());
00162       }
00163 
00164       for(std::vector<item_t>::size_type index = 0; 
00165           index < subset_tries.size(); ++index)
00166       {
00167          while( subset_child_iters[index] !=  subset_tries[index]->edgelist.end() &&
00168                 (*(subset_child_iters[index])).first < leaf_item)
00169             ++subset_child_iters[index];
00170          if( subset_child_iters[index] !=  subset_tries[index]->edgelist.end() &&
00171              (*(subset_child_iters[index])).first == leaf_item)
00172             intersectNEE(static_cast<const TRIE*>((*(subset_child_iters[index])).second));
00173          else
00174          {
00175             while( subset_nee_iters[index] !=  subset_tries[index]->neelist.end() &&
00176                 *subset_nee_iters[index] < leaf_item)
00177                ++subset_nee_iters[index];
00178             if( subset_nee_iters[index] != subset_tries[index]->neelist.end() &&
00179                 *subset_nee_iters[index] == leaf_item)
00180                intersectNEE( subset_tries[index] );
00181             else
00182             {
00183                ext_items.clear();
00184                break;
00185             }
00186          }
00187       }
00188    }
00189    template <class DF_D, class TRIE, bool NEE> inline bool
00190    IntersectProPruner<DF_D, TRIE, NEE>::
00191    findSubsetTries( std::vector<item_t>& itemset, 
00192                     std::vector<const TRIE*>& subset_tries ) const
00193    {
00194       assert(itemset.size() > 0);
00195       std::vector<item_t>::iterator item_it = itemset.end()-1;
00196       register item_t deleted_item = *item_it, temp_item;
00197       const TRIE* a_subset_trie;
00198 
00199       itemset.pop_back();
00200       a_subset_trie = PARENT::main_trie.isIncluded( itemset, itemset.begin() );
00201       if(a_subset_trie)
00202          subset_tries.push_back(a_subset_trie);
00203       else 
00204       {
00205          itemset.push_back(deleted_item);
00206          return false;
00207       }
00208       while(item_it != itemset.begin() )
00209       {
00210          --item_it;
00211          temp_item = *item_it;
00212          *item_it = deleted_item;
00213          deleted_item = temp_item;
00214          a_subset_trie = PARENT::main_trie.isIncluded( itemset, itemset.begin() );
00215          if(a_subset_trie)
00216             subset_tries.push_back(a_subset_trie);
00217          else
00218          {
00219             itemset.insert( item_it, deleted_item );
00220             return false;
00221          }
00222       }
00223       itemset.insert( item_it, deleted_item );
00224       return true;
00225    }
00226 
00227    template <class DF_D, class TRIE, bool NEE> inline void 
00228    IntersectProPruner<DF_D, TRIE, NEE>::
00229    generateCandidateAtParent(TRIE* trie, std::vector<item_t>& maybe_candidate)
00230    {
00231       std::vector<const TRIE*> subset_tries;
00232       const bool all_subset_included = findSubsetTries( 
00233          maybe_candidate, subset_tries);
00234       typename TRIE::iterator itEdge( trie->edgelist.begin() ), itEdge2;
00235       TRIE* toExtend;
00236       while( itEdge != trie->edgelist.end() )
00237       {
00238          toExtend = static_cast<TRIE*>((*itEdge).second);
00239          PARENT::df_decoder.pushItemWithWrite( (*itEdge).first, toExtend->getCounter() );
00240          PARENT::df_decoder.popItem();
00241          if(all_subset_included)
00242          {
00243             itEdge2 = itEdge;
00244             ++itEdge2;
00245             ext_items.clear();
00246             for( ; itEdge2 != trie->edgelist.end(); ++itEdge2 )
00247                ext_items.push_back((*itEdge2).first);
00248             filterNonExtenders( subset_tries, (*itEdge).first );
00249             extenders.clear();
00250             for(std::vector<item_t>::iterator it = ext_items.begin();
00251                 it != ext_items.end(); ++it)
00252                extenders.push_back(Edge(*it, new TRIE(0)));
00253             if(extenders.empty())
00254             {
00255                delete toExtend;
00256                itEdge = trie->edgelist.erase(itEdge);
00257             }
00258             else 
00259             {
00260                toExtend->edgelist.insert(extenders);
00261                ++itEdge;
00262             }
00263          }
00264          else
00265          {
00266             delete toExtend;
00267             itEdge = trie->edgelist.erase(itEdge);
00268          }
00269       }
00270    }
00271    template <class DF_D, class TRIE, bool NEE> inline void 
00272    IntersectProPruner<DF_D, TRIE, NEE>::
00273    generateCandidateAtParentNEE(TRIE* trie, std::vector<item_t>& maybe_candidate,
00274                                 std::vector<item_t>& NEEsum)
00275    {
00276       std::vector<const TRIE*> subset_tries;
00277       NEEsum.insert( NEEsum.end(), 
00278                      trie->neelist.begin(), trie->neelist.end() );
00279 
00280       if( findSubsetTries( maybe_candidate, subset_tries ) )
00281       {
00282          typename TRIE::iterator itEdge( trie->edgelist.begin() ), itEdge2;
00283          TRIE* toExtend;
00284          while( itEdge != trie->edgelist.end() )
00285          {
00286             toExtend = static_cast<TRIE*>((*itEdge).second);
00287             itEdge2 = itEdge;
00288             ++itEdge2;
00289             ext_items.clear();
00290             ext_nee.clear();
00291             for( ; itEdge2 != trie->edgelist.end(); ++itEdge2 )
00292                ext_items.push_back((*itEdge2).first);
00293             filterNonExtendersNEE( subset_tries, (*itEdge).first );
00294             extenders.clear();
00295             for(std::vector<item_t>::iterator it = ext_items.begin();
00296                 it != ext_items.end(); ++it)
00297                extenders.push_back(Edge(*it, new TRIE(0)));
00298             if(extenders.empty() && ext_nee.empty() && NEEsum.size() < 1)
00299             {
00300                PARENT::df_decoder.pushItemWithWrite( 
00301                   (*itEdge).first,
00302                   static_cast<TRIE*>(toExtend)->getCounter() );
00303                PARENT::df_decoder.popItem();
00304                delete toExtend;
00305                itEdge = trie->edgelist.erase(itEdge);
00306             }
00307             else
00308             {
00309                toExtend->edgelist.insert(extenders);
00310                toExtend->neelist.reserve(ext_nee.size());
00311                toExtend->neelist.insert( toExtend->neelist.end(), 
00312                                          ext_nee.begin(), ext_nee.end() );
00313                ++itEdge;
00314             }
00315          }
00316       }
00317       else if(NEEsum.size() < 1)
00318       {
00319          typename TRIE::iterator itEdge( trie->edgelist.begin() );
00320          while( itEdge != trie->edgelist.end() )
00321          {
00322             PARENT::df_decoder.pushItemWithWrite( 
00323                (*itEdge).first,
00324                static_cast<TRIE*>((*itEdge).second)->getCounter() );
00325             PARENT::df_decoder.popItem();
00326             delete static_cast<TRIE*>((*itEdge).second);
00327             itEdge = trie->edgelist.erase(itEdge);
00328          }
00329       }
00330       NEEsum.resize( NEEsum.size() - trie->neelist.size() );
00331    }
00332 }
00333 #endif

Generated on Sun Sep 17 17:50:39 2006 for FIM environment by  doxygen 1.4.4