collection.h revision f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2
1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// collection.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Class to store a collection of sets with elements of type T.
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_EXTENSIONS_PDT_COLLECTION_H__
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_EXTENSIONS_PDT_COLLECTION_H__
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <algorithm>
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector>
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector;
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/bi-table.h>
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Stores a collection of non-empty sets with elements of type T. A
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// default constructor, equality ==, a total order <, and an STL-style
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// hash class must be defined on the elements. Provides signed
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// integer ID (of type I) of each unique set. The IDs are allocated
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// starting from 0 in order.
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class I, class T>
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass Collection {
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  struct Node {  // Trie node
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    I node_id;   // Root is kNoNodeId;
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    T element;
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Node() : node_id(kNoNodeId), element(T()) {}
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Node(I i, const T &t) : node_id(i), element(t) {}
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool operator==(const Node& n) const {
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return n.node_id == node_id && n.element == element;
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  };
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  struct NodeHash {
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    size_t operator()(const Node &n) const {
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return n.node_id + hash_(n.element) * kPrime;
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  };
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef CompactHashBiTable<I, Node, NodeHash> NodeTable;
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  class SetIterator {
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   public:
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetIterator(I id, Node node, NodeTable *node_table)
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        :id_(id), node_(node), node_table_(node_table) {}
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool Done() const { return id_ == kNoNodeId; }
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const T &Element() const { return node_.element; }
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    void Next() {
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      id_ = node_.node_id;
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (id_ != kNoNodeId)
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        node_ = node_table_->FindEntry(id_);
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   private:
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    I id_;                     // Iterator set node id
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Node node_;                // Iterator set node
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    NodeTable *node_table_;
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  };
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Collection() {}
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Lookups integer ID from set. If it doesn't exist, then adds it.
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Set elements should be in strict order (and therefore unique).
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  I FindId(const vector<T> &set) {
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    I node_id = kNoNodeId;
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (ssize_t i = set.size() - 1; i >= 0; --i) {
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Node node(node_id, set[i]);
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      node_id = node_table_.FindId(node);
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return node_id;
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Finds set given integer ID. Returns true if ID corresponds
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // to set. Use iterators below to traverse result.
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  SetIterator FindSet(I id) {
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (id < 0 && id >= node_table_.Size()) {
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return SetIterator(kNoNodeId, Node(kNoNodeId, T()), &node_table_);
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return SetIterator(id, node_table_.FindEntry(id), &node_table_);
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static const I kNoNodeId;
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static const size_t kPrime;
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static std::tr1::hash<T> hash_;
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  NodeTable node_table_;
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DISALLOW_COPY_AND_ASSIGN(Collection);
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class I, class T> const I Collection<I, T>::kNoNodeId = -1;
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class I, class T> const size_t Collection<I, T>::kPrime = 7853;
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class I, class T> std::tr1::hash<T> Collection<I, T>::hash_;
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_EXTENSIONS_PDT_COLLECTION_H__
123