1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// collection.h 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 19dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// Class to store a collection of ordered (multi-)sets with elements of type T. 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_EXTENSIONS_PDT_COLLECTION_H__ 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_EXTENSIONS_PDT_COLLECTION_H__ 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <algorithm> 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector> 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector; 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/bi-table.h> 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 32dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// Stores a collection of non-empty, ordered (multi-)sets with elements 33dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// of type T. A default constructor, equality ==, and an STL-style 34dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// hash class must be defined on the elements. Provides signed integer 35dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// ID (of type I) of each unique set. The IDs are allocated starting 36dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// from 0 in order. 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class I, class T> 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass Collection { 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson struct Node { // Trie node 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson I node_id; // Root is kNoNodeId; 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson T element; 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Node() : node_id(kNoNodeId), element(T()) {} 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Node(I i, const T &t) : node_id(i), element(t) {} 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool operator==(const Node& n) const { 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return n.node_id == node_id && n.element == element; 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson struct NodeHash { 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t operator()(const Node &n) const { 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return n.node_id + hash_(n.element) * kPrime; 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef CompactHashBiTable<I, Node, NodeHash> NodeTable; 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class SetIterator { 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetIterator(I id, Node node, NodeTable *node_table) 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson :id_(id), node_(node), node_table_(node_table) {} 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Done() const { return id_ == kNoNodeId; } 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const T &Element() const { return node_.element; } 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Next() { 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson id_ = node_.node_id; 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (id_ != kNoNodeId) 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson node_ = node_table_->FindEntry(id_); 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson I id_; // Iterator set node id 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Node node_; // Iterator set node 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson NodeTable *node_table_; 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Collection() {} 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 83dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin // Lookups integer ID from ordered multi-set. If it doesn't exist 84dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin // and 'insert' is true, then adds it. Otherwise returns -1. 85dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin I FindId(const vector<T> &set, bool insert = true) { 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson I node_id = kNoNodeId; 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (ssize_t i = set.size() - 1; i >= 0; --i) { 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Node node(node_id, set[i]); 89dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin node_id = node_table_.FindId(node, insert); 90dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin if (node_id == -1) break; 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return node_id; 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 95dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin // Finds ordered (multi-)set given integer ID. Returns set iterator 96dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin // to traverse result. 97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetIterator FindSet(I id) { 98dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin if (id < 0 || id >= node_table_.Size()) { 99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return SetIterator(kNoNodeId, Node(kNoNodeId, T()), &node_table_); 100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return SetIterator(id, node_table_.FindEntry(id), &node_table_); 102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 105dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin I Size() const { return node_table_.Size(); } 106dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin 107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static const I kNoNodeId; 109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static const size_t kPrime; 1103da1eb108d36da35333b2d655202791af854996bPrzemyslaw Szczepaniak static std::tr1::hash<T> hash_; 111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson NodeTable node_table_; 113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DISALLOW_COPY_AND_ASSIGN(Collection); 115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class I, class T> const I Collection<I, T>::kNoNodeId = -1; 118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class I, class T> const size_t Collection<I, T>::kPrime = 7853; 120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1213da1eb108d36da35333b2d655202791af854996bPrzemyslaw Szczepaniaktemplate <class I, class T> std::tr1::hash<T> Collection<I, T>::hash_; 122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_EXTENSIONS_PDT_COLLECTION_H__ 126