1/* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17// Notes on thread-safety: All of the classes here are thread-compatible. More 18// specifically, the registry machinery is thread-safe, as long as each thread 19// performs feature extraction on a different Sentence object. 20 21#ifndef LIBTEXTCLASSIFIER_COMMON_WORKSPACE_H_ 22#define LIBTEXTCLASSIFIER_COMMON_WORKSPACE_H_ 23 24#include <stddef.h> 25#include <string> 26#include <unordered_map> 27#include <utility> 28#include <vector> 29 30#include "util/base/logging.h" 31#include "util/base/macros.h" 32 33namespace libtextclassifier { 34namespace nlp_core { 35 36// A base class for shared workspaces. Derived classes implement a static member 37// function TypeName() which returns a human readable std::string name for the 38// class. 39class Workspace { 40 public: 41 // Polymorphic destructor. 42 virtual ~Workspace() {} 43 44 protected: 45 // Create an empty workspace. 46 Workspace() {} 47 48 private: 49 TC_DISALLOW_COPY_AND_ASSIGN(Workspace); 50}; 51 52// Returns a new, strictly increasing int every time it is invoked. 53int GetFreshTypeId(); 54 55// Struct to simulate typeid, but without RTTI. 56template <typename T> 57struct TypeId { 58 static int type_id; 59}; 60 61template <typename T> 62int TypeId<T>::type_id = GetFreshTypeId(); 63 64// A registry that keeps track of workspaces. 65class WorkspaceRegistry { 66 public: 67 // Create an empty registry. 68 WorkspaceRegistry() {} 69 70 // Returns the index of a named workspace, adding it to the registry first 71 // if necessary. 72 template <class W> 73 int Request(const std::string &name) { 74 const int id = TypeId<W>::type_id; 75 max_workspace_id_ = std::max(id, max_workspace_id_); 76 workspace_types_[id] = W::TypeName(); 77 std::vector<std::string> &names = workspace_names_[id]; 78 for (int i = 0; i < names.size(); ++i) { 79 if (names[i] == name) return i; 80 } 81 names.push_back(name); 82 return names.size() - 1; 83 } 84 85 // Returns the maximum workspace id that has been registered. 86 int MaxId() const { 87 return max_workspace_id_; 88 } 89 90 const std::unordered_map<int, std::vector<std::string> > &WorkspaceNames() 91 const { 92 return workspace_names_; 93 } 94 95 // Returns a std::string describing the registered workspaces. 96 std::string DebugString() const; 97 98 private: 99 // Workspace type names, indexed as workspace_types_[typeid]. 100 std::unordered_map<int, std::string> workspace_types_; 101 102 // Workspace names, indexed as workspace_names_[typeid][workspace]. 103 std::unordered_map<int, std::vector<std::string> > workspace_names_; 104 105 // The maximum workspace id that has been registered. 106 int max_workspace_id_ = 0; 107 108 TC_DISALLOW_COPY_AND_ASSIGN(WorkspaceRegistry); 109}; 110 111// A typed collected of workspaces. The workspaces are indexed according to an 112// external WorkspaceRegistry. If the WorkspaceSet is const, the contents are 113// also immutable. 114class WorkspaceSet { 115 public: 116 ~WorkspaceSet() { Reset(WorkspaceRegistry()); } 117 118 // Returns true if a workspace has been set. 119 template <class W> 120 bool Has(int index) const { 121 const int id = TypeId<W>::type_id; 122 TC_DCHECK_GE(id, 0); 123 TC_DCHECK_LT(id, workspaces_.size()); 124 TC_DCHECK_GE(index, 0); 125 TC_DCHECK_LT(index, workspaces_[id].size()); 126 if (id >= workspaces_.size()) return false; 127 return workspaces_[id][index] != nullptr; 128 } 129 130 // Returns an indexed workspace; the workspace must have been set. 131 template <class W> 132 const W &Get(int index) const { 133 TC_DCHECK(Has<W>(index)); 134 const int id = TypeId<W>::type_id; 135 const Workspace *w = workspaces_[id][index]; 136 return reinterpret_cast<const W &>(*w); 137 } 138 139 // Sets an indexed workspace; this takes ownership of the workspace, which 140 // must have been new-allocated. It is an error to set a workspace twice. 141 template <class W> 142 void Set(int index, W *workspace) { 143 const int id = TypeId<W>::type_id; 144 TC_DCHECK_GE(id, 0); 145 TC_DCHECK_LT(id, workspaces_.size()); 146 TC_DCHECK_GE(index, 0); 147 TC_DCHECK_LT(index, workspaces_[id].size()); 148 TC_DCHECK(workspaces_[id][index] == nullptr); 149 TC_DCHECK(workspace != nullptr); 150 workspaces_[id][index] = workspace; 151 } 152 153 void Reset(const WorkspaceRegistry ®istry) { 154 // Deallocate current workspaces. 155 for (auto &it : workspaces_) { 156 for (size_t index = 0; index < it.size(); ++index) { 157 delete it[index]; 158 } 159 } 160 workspaces_.clear(); 161 workspaces_.resize(registry.MaxId() + 1, std::vector<Workspace *>()); 162 for (auto &it : registry.WorkspaceNames()) { 163 workspaces_[it.first].resize(it.second.size()); 164 } 165 } 166 167 private: 168 // The set of workspaces, indexed as workspaces_[typeid][index]. 169 std::vector<std::vector<Workspace *> > workspaces_; 170}; 171 172// A workspace that wraps around a single int. 173class SingletonIntWorkspace : public Workspace { 174 public: 175 // Default-initializes the int value. 176 SingletonIntWorkspace() {} 177 178 // Initializes the int with the given value. 179 explicit SingletonIntWorkspace(int value) : value_(value) {} 180 181 // Returns the name of this type of workspace. 182 static std::string TypeName() { return "SingletonInt"; } 183 184 // Returns the int value. 185 int get() const { return value_; } 186 187 // Sets the int value. 188 void set(int value) { value_ = value; } 189 190 private: 191 // The enclosed int. 192 int value_ = 0; 193}; 194 195// A workspace that wraps around a vector of int. 196class VectorIntWorkspace : public Workspace { 197 public: 198 // Creates a vector of the given size. 199 explicit VectorIntWorkspace(int size); 200 201 // Creates a vector initialized with the given array. 202 explicit VectorIntWorkspace(const std::vector<int> &elements); 203 204 // Creates a vector of the given size, with each element initialized to the 205 // given value. 206 VectorIntWorkspace(int size, int value); 207 208 // Returns the name of this type of workspace. 209 static std::string TypeName(); 210 211 // Returns the i'th element. 212 int element(int i) const { return elements_[i]; } 213 214 // Sets the i'th element. 215 void set_element(int i, int value) { elements_[i] = value; } 216 217 private: 218 // The enclosed vector. 219 std::vector<int> elements_; 220}; 221 222// A workspace that wraps around a vector of vector of int. 223class VectorVectorIntWorkspace : public Workspace { 224 public: 225 // Creates a vector of empty vectors of the given size. 226 explicit VectorVectorIntWorkspace(int size); 227 228 // Returns the name of this type of workspace. 229 static std::string TypeName(); 230 231 // Returns the i'th vector of elements. 232 const std::vector<int> &elements(int i) const { return elements_[i]; } 233 234 // Mutable access to the i'th vector of elements. 235 std::vector<int> *mutable_elements(int i) { return &(elements_[i]); } 236 237 private: 238 // The enclosed vector of vector of elements. 239 std::vector<std::vector<int> > elements_; 240}; 241 242} // namespace nlp_core 243} // namespace libtextclassifier 244 245#endif // LIBTEXTCLASSIFIER_COMMON_WORKSPACE_H_ 246