collection.h revision f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2
1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// collection.h 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Class to store a collection of sets with elements of type T. 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_EXTENSIONS_PDT_COLLECTION_H__ 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_EXTENSIONS_PDT_COLLECTION_H__ 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <algorithm> 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector> 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector; 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/bi-table.h> 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Stores a collection of non-empty sets with elements of type T. A 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// default constructor, equality ==, a total order <, and an STL-style 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// hash class must be defined on the elements. Provides signed 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// integer ID (of type I) of each unique set. The IDs are allocated 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// starting from 0 in order. 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class I, class T> 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass Collection { 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson struct Node { // Trie node 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson I node_id; // Root is kNoNodeId; 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson T element; 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Node() : node_id(kNoNodeId), element(T()) {} 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Node(I i, const T &t) : node_id(i), element(t) {} 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool operator==(const Node& n) const { 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return n.node_id == node_id && n.element == element; 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson struct NodeHash { 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t operator()(const Node &n) const { 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return n.node_id + hash_(n.element) * kPrime; 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef CompactHashBiTable<I, Node, NodeHash> NodeTable; 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class SetIterator { 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetIterator(I id, Node node, NodeTable *node_table) 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson :id_(id), node_(node), node_table_(node_table) {} 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Done() const { return id_ == kNoNodeId; } 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const T &Element() const { return node_.element; } 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Next() { 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson id_ = node_.node_id; 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (id_ != kNoNodeId) 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson node_ = node_table_->FindEntry(id_); 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson I id_; // Iterator set node id 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Node node_; // Iterator set node 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson NodeTable *node_table_; 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Collection() {} 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Lookups integer ID from set. If it doesn't exist, then adds it. 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Set elements should be in strict order (and therefore unique). 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson I FindId(const vector<T> &set) { 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson I node_id = kNoNodeId; 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (ssize_t i = set.size() - 1; i >= 0; --i) { 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Node node(node_id, set[i]); 89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson node_id = node_table_.FindId(node); 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return node_id; 92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Finds set given integer ID. Returns true if ID corresponds 95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // to set. Use iterators below to traverse result. 96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetIterator FindSet(I id) { 97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (id < 0 && id >= node_table_.Size()) { 98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return SetIterator(kNoNodeId, Node(kNoNodeId, T()), &node_table_); 99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return SetIterator(id, node_table_.FindEntry(id), &node_table_); 101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static const I kNoNodeId; 106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static const size_t kPrime; 107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static std::tr1::hash<T> hash_; 108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson NodeTable node_table_; 110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DISALLOW_COPY_AND_ASSIGN(Collection); 112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class I, class T> const I Collection<I, T>::kNoNodeId = -1; 115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class I, class T> const size_t Collection<I, T>::kPrime = 7853; 117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class I, class T> std::tr1::hash<T> Collection<I, T>::hash_; 119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_EXTENSIONS_PDT_COLLECTION_H__ 123