Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

inhomogeneous_trie/trie_manipulators/IntersectProPruner.hpp

Go to the documentation of this file.
00001 #ifndef IntersectProPruner_HPP
00002 #define IntersectProPruner_HPP
00003 
00004 #include "common.hpp"
00005 #include "common/Edge.hpp"
00006 #include "apriori/bodon/inhomogeneous_trie/trie_manipulators/ManipulatorBase.hpp"
00007 #include <vector>
00008 //#include <cstdio>
00009 //#include <iterator>   //for test
00010 
00011 
00012 
00013 
00014 namespace Bodon
00015 {
00016 namespace inhomogeneous_trie
00017 {
00018    template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR, 
00019              bool NEE, bool DEADENDPRUNE = true>
00020    class IntersectProPruner : public ManipulatorBase<DF_D, TRIE, LEAF_ALLOCATOR>
00021    {
00022       protected:
00023          std::vector<Edge> extenders;
00024          mutable std::vector<item_t> ext_items;
00025          mutable std::vector<item_t> leaf_neelist;
00026          std::vector<Edge> replace_list;
00027          bool is_there_any_new_candidate;
00028          typedef ManipulatorBase<DF_D, TRIE, LEAF_ALLOCATOR> PARENT;
00029       public:
00030          IntersectProPruner( TRIE& main_trie, DF_D& df_decoder,
00031                              LEAF_ALLOCATOR& s_alloc ) : 
00032             PARENT(main_trie, df_decoder, s_alloc){}
00033 
00034       protected:
00035          
00036          void intersect( const TRIE* subset_trie ) const;
00037 
00038          void intersectNEE( const TRIE* subset_trie ) const;
00039 
00040          void filterNonExtenders( 
00041             const std::vector<const TRIE*>& subset_tries,
00042             const item_t leaf_item ) const;
00043 
00044          void filterNonExtendersNEE( 
00045             const std::vector<const TRIE*>& subset_tries,
00046             const item_t leaf_item ) const;
00047 
00048 
00049          bool findSubsetTries( 
00050             std::vector<item_t>& itemset, 
00051             std::vector<const TRIE*>& subset_trie) const;
00052 
00053          void generateCandidateAtParent(
00054             TRIE* trie, std::vector<item_t>& maybe_candidate);
00055          void generateCandidateAtParentNEE(
00056             TRIE* trie, std::vector<item_t>& maybe_candidate,
00057             std::vector<item_t>& NEEsum);
00058          void generateCandidateAtParentNEENoDeadend(
00059             TRIE* trie, std::vector<item_t>& maybe_candidate);
00060 
00061 
00062    };
00063 
00064    template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR, 
00065              bool NEE, bool DEADENDPRUNE> inline void 
00066    IntersectProPruner<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00067    intersect( const TRIE* subset_trie) const
00068    {
00069       typename TRIE::const_iterator it_e( subset_trie->edgelist.begin() );
00070       std::vector<item_t>::iterator it_i(ext_items.begin());
00071       while(it_e != subset_trie->edgelist.end() && it_i != ext_items.end())
00072       {
00073          if(*it_e < *it_i)
00074             ++it_e;
00075          else if(*it_e > *it_i)
00076             it_i = ext_items.erase(it_i);
00077          else
00078          {
00079             ++it_e;
00080             ++it_i;
00081          }
00082       }
00083       ext_items.erase(it_i, ext_items.end());      
00084    }
00085 
00086    template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR, 
00087              bool NEE, bool DEADENDPRUNE> inline void 
00088    IntersectProPruner<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00089    intersectNEE( const TRIE* subset_trie ) const
00090    {
00091       typename TRIE::const_iterator it_e( subset_trie->edgelist.begin() );
00092       std::vector<item_t>::const_iterator it_nee( subset_trie->neelist.begin() );
00093       std::vector<item_t>::iterator it_i(ext_items.begin());
00094       while(it_e != subset_trie->edgelist.end() && it_i != ext_items.end())
00095       {
00096          if(*it_e < *it_i)
00097             ++it_e;
00098          else if(*it_e > *it_i)
00099          {
00100             while(it_nee != subset_trie->neelist.end() && *it_nee < *it_i)
00101                ++it_nee;
00102             if(it_nee != subset_trie->neelist.end() && *it_nee == *it_i)
00103                leaf_neelist.push_back(*it_i);
00104             it_i = ext_items.erase(it_i);
00105          }
00106          else
00107          {
00108             ++it_e;
00109             ++it_i;
00110          }
00111       }
00112       if(it_i != ext_items.end())
00113       {
00114          std::vector<item_t>::iterator it_2(it_i);
00115          do
00116          {
00117             while(it_nee != subset_trie->neelist.end() && *it_nee < *it_i)
00118                ++it_nee;
00119             if(it_nee != subset_trie->neelist.end() && *it_nee == *it_i)
00120                leaf_neelist.push_back(*it_i);
00121             ++it_i;
00122          }
00123          while(it_i != ext_items.end());
00124          ext_items.erase(it_2, ext_items.end());
00125       }
00126    }
00127 
00128    template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR, 
00129              bool NEE, bool DEADENDPRUNE> inline void 
00130    IntersectProPruner<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00131    filterNonExtenders( const std::vector<const TRIE*>& subset_tries,  
00132                        const item_t leaf_item ) const
00133    {
00134       std::vector<typename TRIE::const_iterator> subset_child_iters;
00135       subset_child_iters.reserve(subset_tries.size());
00136       for(typename std::vector<const TRIE*>::const_iterator it = subset_tries.begin(); 
00137           it !=  subset_tries.end(); ++it)
00138          subset_child_iters.push_back((*it)->edgelist.begin());
00139 
00140       for(std::vector<item_t>::size_type index = 0; 
00141           index < subset_tries.size(); ++index)
00142       {
00143          while( subset_child_iters[index] !=  subset_tries[index]->edgelist.end() &&
00144                 (*(subset_child_iters[index])).first < leaf_item)
00145             ++subset_child_iters[index];
00146          if( subset_child_iters[index] !=  subset_tries[index]->edgelist.end() &&
00147              (*(subset_child_iters[index])).first == leaf_item)
00148             intersect(static_cast<const TRIE*>((*(subset_child_iters[index])).second));
00149          else
00150          {
00151             ext_items.clear();
00152             break;
00153          }
00154       }
00155    }
00156 
00157 
00158    template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR, 
00159              bool NEE, bool DEADENDPRUNE> inline void 
00160    IntersectProPruner<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00161    filterNonExtendersNEE( const std::vector<const TRIE*>& subset_tries,
00162                           const item_t leaf_item ) const
00163    {
00164       std::vector<typename TRIE::const_iterator> subset_child_iters; 
00165       std::vector<vector<item_t>::const_iterator> subset_nee_iters;
00166       subset_child_iters.reserve(subset_tries.size());
00167       subset_nee_iters.reserve(subset_tries.size());
00168       for(typename std::vector<const TRIE*>::const_iterator it = subset_tries.begin(); 
00169           it !=  subset_tries.end(); ++it)
00170       {
00171          subset_child_iters.push_back((*it)->edgelist.begin());
00172          subset_nee_iters.push_back((*it)->neelist.begin());
00173       }
00174 
00175       for(std::vector<item_t>::size_type index = 0; 
00176           index < subset_tries.size(); ++index)
00177       {
00178          while( subset_child_iters[index] !=  subset_tries[index]->edgelist.end() &&
00179                 (*(subset_child_iters[index])).first < leaf_item)
00180             ++subset_child_iters[index];
00181          if( subset_child_iters[index] !=  subset_tries[index]->edgelist.end() &&
00182              (*(subset_child_iters[index])).first == leaf_item)
00183             intersectNEE(static_cast<const TRIE*>((*(subset_child_iters[index])).second));
00184          else
00185          {
00186             while( subset_nee_iters[index] !=  subset_tries[index]->neelist.end() &&
00187                 *subset_nee_iters[index] < leaf_item)
00188                ++subset_nee_iters[index];
00189             if( subset_nee_iters[index] != subset_tries[index]->neelist.end() &&
00190                 *subset_nee_iters[index] == leaf_item)
00191                intersectNEE( subset_tries[index] );
00192             else
00193             {
00194                ext_items.clear();
00195                break;
00196             }
00197          }
00198       }
00199    }
00200    template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR, 
00201              bool NEE, bool DEADENDPRUNE> inline bool
00202    IntersectProPruner<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00203    findSubsetTries( std::vector<item_t>& itemset, 
00204                     std::vector<const TRIE*>& subset_tries ) const
00205    {
00206       assert(!itemset.empty());
00207       std::vector<item_t>::iterator item_it = itemset.end()-1;
00208       register item_t deleted_item = *item_it, temp_item;
00209       const TRIE* a_subset_trie;
00210 
00211       itemset.pop_back();
00212       a_subset_trie = this->main_trie.isIncluded( itemset, itemset.begin() );
00213       if(a_subset_trie)
00214          subset_tries.push_back(a_subset_trie);
00215       else 
00216       {
00217          itemset.push_back(deleted_item);
00218          return false;
00219       }
00220       while(item_it != itemset.begin() )
00221       {
00222          --item_it;
00223          temp_item = *item_it;
00224          *item_it = deleted_item;
00225          deleted_item = temp_item;
00226          a_subset_trie = this->main_trie.isIncluded( itemset, itemset.begin() );
00227          if(a_subset_trie)
00228             subset_tries.push_back(a_subset_trie);
00229          else
00230          {
00231             itemset.insert( item_it, deleted_item );
00232             return false;
00233          }
00234       }
00235       itemset.insert( item_it, deleted_item );
00236       return true;
00237    }
00238 
00239    template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR, 
00240              bool NEE, bool DEADENDPRUNE> inline void 
00241    IntersectProPruner<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00242    generateCandidateAtParent(TRIE* trie, std::vector<item_t>& maybe_candidate)
00243    {
00244       std::vector<const TRIE*> subset_tries;
00245       const bool all_subset_included = findSubsetTries( 
00246          maybe_candidate, subset_tries);
00247       typename TRIE::iterator itEdge2;
00248       LEAF* toExtendAsLeaf;
00249       TRIE* toExtend;
00250       replace_list.clear();
00251       for( typename TRIE::iterator itEdge( trie->edgelist.begin() ); 
00252            itEdge != trie->edgelist.end(); ++itEdge )
00253       {
00254          toExtendAsLeaf = static_cast<LEAF*>((*itEdge).second);
00255          PARENT::df_decoder.pushItemWithWrite( (*itEdge).first, 
00256                                                toExtendAsLeaf->getCounter() );
00257          PARENT::df_decoder.popItem();
00258          if(all_subset_included)
00259          {
00260             itEdge2 = itEdge;
00261             ++itEdge2;
00262             ext_items.clear();
00263             for( ; itEdge2 != trie->edgelist.end(); ++itEdge2 )
00264                ext_items.push_back((*itEdge2).first);
00265             filterNonExtenders( subset_tries, (*itEdge).first );
00266             extenders.clear();
00267             for(std::vector<item_t>::iterator it = ext_items.begin();
00268                 it != ext_items.end(); ++it)
00269             {
00270                extenders.push_back(Edge(*it, PARENT::s_alloc.allocate()));
00271                static_cast<LEAF*>(extenders.back().second)->setCounter(0);
00272             }
00273             if(!extenders.empty())
00274             {
00275                toExtend = new TRIE(toExtendAsLeaf->getCounter());
00276                toExtend->edgelist.insert(extenders);
00277                replace_list.push_back(Edge((*itEdge).first, toExtend));
00278                if( !DEADENDPRUNE)
00279                   is_there_any_new_candidate = true;
00280             }
00281          }
00282          PARENT::s_alloc.free(toExtendAsLeaf);
00283       }
00284       trie->edgelist.clear();
00285       trie->edgelist.insert(replace_list);
00286    }
00287    template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR, 
00288              bool NEE, bool DEADENDPRUNE> inline void 
00289    IntersectProPruner<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00290    generateCandidateAtParentNEE(TRIE* trie, std::vector<item_t>& maybe_candidate,
00291                                 std::vector<item_t>& NEEsum)
00292    {
00293       std::vector<const TRIE*> subset_tries;
00294       NEEsum.insert( NEEsum.end(), 
00295                      trie->neelist.begin(), trie->neelist.end() );
00296       replace_list.clear();
00297       if( findSubsetTries( maybe_candidate, subset_tries ) )
00298       {
00299          typename TRIE::iterator itEdge2;
00300          LEAF* toExtendAsLeaf;
00301          TRIE* toExtend;
00302          for( typename TRIE::iterator itEdge( trie->edgelist.begin() ); 
00303               itEdge != trie->edgelist.end(); ++itEdge )
00304          {
00305             toExtendAsLeaf = static_cast<LEAF*>((*itEdge).second);
00306             itEdge2 = itEdge;
00307             ++itEdge2;
00308             ext_items.clear();
00309             leaf_neelist.clear();
00310             for( ; itEdge2 != trie->edgelist.end(); ++itEdge2 )
00311                ext_items.push_back((*itEdge2).first);
00312             filterNonExtendersNEE( subset_tries, (*itEdge).first );
00313             extenders.clear();
00314             for(std::vector<item_t>::iterator it = ext_items.begin();
00315                 it != ext_items.end(); ++it)
00316             {
00317                extenders.push_back(Edge(*it, PARENT::s_alloc.allocate()));
00318                static_cast<LEAF*>(extenders.back().second)->setCounter(0);
00319             }
00320             if(DEADENDPRUNE && extenders.empty() && leaf_neelist.empty() && NEEsum.empty() )
00321             {
00322                PARENT::df_decoder.pushItemWithWrite( (*itEdge).first,
00323                                                      toExtendAsLeaf->getCounter() );
00324                PARENT::df_decoder.popItem();
00325             }
00326             else
00327             {
00328                toExtend = new TRIE(toExtendAsLeaf->getCounter());
00329                toExtend->neePushBackSorted( leaf_neelist );
00330                replace_list.push_back(Edge((*itEdge).first, toExtend));
00331                toExtend->edgelist.insert(extenders);
00332                if( !DEADENDPRUNE)
00333                   is_there_any_new_candidate = true;
00334             }
00335             PARENT::s_alloc.free(toExtendAsLeaf);
00336          }
00337          trie->edgelist.clear();
00338          trie->edgelist.insert(replace_list);
00339       }
00340       else if(DEADENDPRUNE && NEEsum.empty())
00341       {
00342          for( typename TRIE::iterator itEdge( trie->edgelist.begin() ); 
00343               itEdge != trie->edgelist.end(); ++itEdge )
00344          {
00345             PARENT::df_decoder.pushItemWithWrite( 
00346                (*itEdge).first, 
00347                static_cast<LEAF*>((*itEdge).second)->getCounter() );
00348             PARENT::df_decoder.popItem();
00349             PARENT::s_alloc.free(static_cast<LEAF*>((*itEdge).second));
00350          }
00351          trie->edgelist.clear();
00352       }
00353       else
00354       {
00355          TRIE* toExtend;
00356          for( typename TRIE::iterator itEdge( trie->edgelist.begin() ); 
00357            itEdge != trie->edgelist.end(); ++itEdge )
00358          {
00359             toExtend = new TRIE( 
00360                static_cast<LEAF*>((*itEdge).second)->getCounter());
00361             PARENT::s_alloc.free(static_cast<LEAF*>((*itEdge).second));
00362             replace_list.push_back(Edge((*itEdge).first, toExtend));
00363          }
00364          trie->edgelist.clear();
00365          trie->edgelist.insert(replace_list);
00366       }
00367       NEEsum.resize( NEEsum.size() - trie->neelist.size() );
00368    }
00369    template <class DF_D, class TRIE, class LEAF, class LEAF_ALLOCATOR, 
00370              bool NEE, bool DEADENDPRUNE> inline void 
00371    IntersectProPruner<DF_D, TRIE, LEAF, LEAF_ALLOCATOR, NEE, DEADENDPRUNE>::
00372    generateCandidateAtParentNEENoDeadend(TRIE* trie, std::vector<item_t>& maybe_candidate)
00373    {
00374       std::vector<const TRIE*> subset_tries;
00375       replace_list.clear();
00376       if( findSubsetTries( maybe_candidate, subset_tries ) )
00377       {
00378          typename TRIE::iterator itEdge2;
00379          LEAF* toExtendAsLeaf;
00380          TRIE* toExtend;
00381          for( typename TRIE::iterator itEdge( trie->edgelist.begin() ); 
00382               itEdge != trie->edgelist.end(); ++itEdge )
00383          {
00384             toExtendAsLeaf = static_cast<LEAF*>((*itEdge).second);
00385             itEdge2 = itEdge;
00386             ++itEdge2;
00387             ext_items.clear();
00388             leaf_neelist.clear();
00389             for( ; itEdge2 != trie->edgelist.end(); ++itEdge2 )
00390                ext_items.push_back((*itEdge2).first);
00391             filterNonExtendersNEE( subset_tries, (*itEdge).first );
00392             extenders.clear();
00393             for(std::vector<item_t>::iterator it = ext_items.begin();
00394                 it != ext_items.end(); ++it)
00395             {
00396                extenders.push_back(Edge(*it, PARENT::s_alloc.allocate()));
00397                static_cast<LEAF*>(extenders.back().second)->setCounter(0);
00398             }
00399             toExtend = new TRIE(toExtendAsLeaf->getCounter());
00400             toExtend->neePushBackSorted( leaf_neelist );
00401             replace_list.push_back(Edge((*itEdge).first, toExtend));
00402             toExtend->edgelist.insert(extenders);
00403             is_there_any_new_candidate = true;
00404             PARENT::s_alloc.free(toExtendAsLeaf);
00405          }
00406          trie->edgelist.clear();
00407          trie->edgelist.insert(replace_list);
00408       }
00409       else 
00410       {
00411          TRIE* toExtend;
00412          for( typename TRIE::iterator itEdge( trie->edgelist.begin() ); 
00413            itEdge != trie->edgelist.end(); ++itEdge )
00414          {
00415             toExtend = new TRIE( 
00416                static_cast<LEAF*>((*itEdge).second)->getCounter());
00417             PARENT::s_alloc.free(static_cast<LEAF*>((*itEdge).second));
00418             replace_list.push_back(Edge((*itEdge).first, toExtend));
00419          }
00420          trie->edgelist.clear();
00421          trie->edgelist.insert(replace_list);
00422       }
00423    }
00424 }
00425 }
00426 #endif

Generated on Sun Sep 17 17:50:39 2006 for FIM environment by  doxygen 1.4.4