partition.h revision f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2
1// partition.h
2
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Copyright 2005-2010 Google, Inc.
16// Author: johans@google.com (Johan Schalkwyk)
17//
18// \file Functions and classes to create a partition of states
19//
20
21#ifndef FST_LIB_PARTITION_H__
22#define FST_LIB_PARTITION_H__
23
24#include <vector>
25using std::vector;
26#include <algorithm>
27
28
29#include <fst/queue.h>
30
31
32
33namespace fst {
34
35template <typename T> class PartitionIterator;
36
37// \class Partition
38// \brief Defines a partitioning of states. Typically used to represent
39//        equivalence classes for Fst operations like minimization.
40//
41template <typename T>
42class Partition {
43  friend class PartitionIterator<T>;
44
45  struct Element {
46   Element() : value(0), next(0), prev(0) {}
47   Element(T v) : value(v), next(0), prev(0) {}
48
49   T        value;
50   Element* next;
51   Element* prev;
52  };
53
54 public:
55  Partition() {}
56
57  Partition(T num_states) {
58    Initialize(num_states);
59  }
60
61  ~Partition() {
62    for (size_t i = 0; i < elements_.size(); ++i)
63      delete elements_[i];
64  }
65
66  // Create an empty partition for num_states. At initialization time
67  // all elements are not assigned to a class (i.e class_index = -1).
68  // Initialize just creates num_states of elements. All element
69  // operations are then done by simply disconnecting the element from
70  // it current class and placing it at the head of the next class.
71  void Initialize(size_t num_states) {
72    for (size_t i = 0; i < elements_.size(); ++i)
73      delete elements_[i];
74    elements_.clear();
75    classes_.clear();
76    class_index_.clear();
77
78    elements_.resize(num_states);
79    class_index_.resize(num_states, -1);
80    class_size_.reserve(num_states);
81    for (size_t i = 0; i < num_states; ++i)
82      elements_[i] = new Element(i);
83    num_states_ = num_states;
84  }
85
86  // Add a class, resize classes_ and class_size_ resource by 1.
87  size_t AddClass() {
88    size_t num_classes = classes_.size();
89    classes_.resize(num_classes + 1, 0);
90    class_size_.resize(num_classes + 1, 0);
91    class_split_.resize(num_classes + 1, 0);
92    split_size_.resize(num_classes + 1, 0);
93    return num_classes;
94  }
95
96  void AllocateClasses(T num_classes) {
97    size_t n = classes_.size() + num_classes;
98    classes_.resize(n, 0);
99    class_size_.resize(n, 0);
100    class_split_.resize(n, 0);
101    split_size_.resize(n, 0);
102  }
103
104  // Add element_id to class_id. The Add method is used to initialize
105  // partition. Once elements have been added to a class, you need to
106  // use the Move() method move an element from once class to another.
107  void Add(T element_id, T class_id) {
108    Element* element = elements_[element_id];
109
110    if (classes_[class_id])
111      classes_[class_id]->prev = element;
112    element->next = classes_[class_id];
113    element->prev = 0;
114    classes_[class_id] = element;
115
116    class_index_[element_id] = class_id;
117    class_size_[class_id]++;
118  }
119
120  // Move and element_id to class_id. Disconnects (removes) element
121  // from it current class and
122  void Move(T element_id, T class_id) {
123    T old_class_id = class_index_[element_id];
124
125    Element* element = elements_[element_id];
126    if (element->next) element->next->prev = element->prev;
127    if (element->prev) element->prev->next = element->next;
128    else               classes_[old_class_id] = element->next;
129
130    Add(element_id, class_id);
131    class_size_[old_class_id]--;
132  }
133
134  // split class on the element_id
135  void SplitOn(T element_id) {
136    T class_id = class_index_[element_id];
137    if (class_size_[class_id] == 1) return;
138
139    // first time class is split
140    if (split_size_[class_id] == 0)
141      visited_classes_.push_back(class_id);
142
143    // increment size of split (set of element at head of chain)
144    split_size_[class_id]++;
145
146    // update split point
147    if (class_split_[class_id] == 0)
148      class_split_[class_id] = classes_[class_id];
149    if (class_split_[class_id] == elements_[element_id])
150      class_split_[class_id] = elements_[element_id]->next;
151
152    // move to head of chain in same class
153    Move(element_id, class_id);
154  }
155
156  // Finalize class_id, split if required, and update class_splits,
157  // class indices of the newly created class. Returns the new_class id
158  // or -1 if no new class was created.
159  T SplitRefine(T class_id) {
160    // only split if necessary
161    if (class_size_[class_id] == split_size_[class_id]) {
162      class_split_[class_id] = 0;
163      split_size_[class_id] = 0;
164      return -1;
165    } else {
166
167      T new_class = AddClass();
168      size_t remainder = class_size_[class_id] - split_size_[class_id];
169      if (remainder < split_size_[class_id]) {  // add smaller
170        Element* split_el   = class_split_[class_id];
171        classes_[new_class] = split_el;
172        class_size_[class_id] = split_size_[class_id];
173        class_size_[new_class] = remainder;
174        split_el->prev->next = 0;
175        split_el->prev = 0;
176      } else {
177        Element* split_el   = class_split_[class_id];
178        classes_[new_class] = classes_[class_id];
179        class_size_[class_id] = remainder;
180        class_size_[new_class] = split_size_[class_id];
181        split_el->prev->next = 0;
182        split_el->prev = 0;
183        classes_[class_id] = split_el;
184      }
185
186      // update class index for element in new class
187      for (Element* el = classes_[new_class]; el; el = el->next)
188        class_index_[el->value] = new_class;
189
190      class_split_[class_id] = 0;
191      split_size_[class_id] = 0;
192
193      return new_class;
194    }
195  }
196
197  // Once all states have been processed for a particular class C, we
198  // can finalize the split. FinalizeSplit() will update each block in the
199  // partition, create new once and update the queue of active classes
200  // that require further refinement.
201  template <class Queue>
202  void FinalizeSplit(Queue* L) {
203    for (size_t i = 0; i < visited_classes_.size(); ++i) {
204      T new_class = SplitRefine(visited_classes_[i]);
205      if (new_class != -1 && L)
206        L->Enqueue(new_class);
207    }
208    visited_classes_.clear();
209  }
210
211
212  const T class_id(T element_id) const {
213    return class_index_[element_id];
214  }
215
216  const vector<T>& class_sizes() const {
217    return class_size_;
218  }
219
220  const size_t class_size(T class_id)  const {
221    return class_size_[class_id];
222  }
223
224  const T num_classes() const {
225    return classes_.size();
226  }
227
228
229 private:
230  int num_states_;
231
232  // container of all elements (owner of ptrs)
233  vector<Element*> elements_;
234
235  // linked list of elements belonging to class
236  vector<Element*> classes_;
237
238  // pointer to split point for each class
239  vector<Element*> class_split_;
240
241  // class index of element
242  vector<T> class_index_;
243
244  // class sizes
245  vector<T> class_size_;
246
247  // size of split for each class
248  vector<T> split_size_;
249
250  // set of visited classes to be used in split refine
251  vector<T> visited_classes_;
252};
253
254
255// iterate over members of a class in a partition
256template <typename T>
257class PartitionIterator {
258  typedef typename Partition<T>::Element Element;
259 public:
260  PartitionIterator(const Partition<T>& partition, T class_id)
261      : p_(partition),
262        element_(p_.classes_[class_id]),
263        class_id_(class_id) {}
264
265  bool Done() {
266    return (element_ == 0);
267  }
268
269  const T Value() {
270    return (element_->value);
271  }
272
273  void Next() {
274    element_ = element_->next;
275  }
276
277  void Reset() {
278    element_ = p_.classes_[class_id_];
279  }
280
281 private:
282  const Partition<T>& p_;
283
284  const Element* element_;
285
286  T class_id_;
287};
288}  // namespace fst
289
290#endif  // FST_LIB_PARTITION_H__
291