partition.h revision 73018b4a1d088cdda0e7bd059fddf1f308a8195a
1// partition.h
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15//
16// \file Functions and classes to create a partition of states
17//
18
19#ifndef FST_LIB_PARTITION_H__
20#define FST_LIB_PARTITION_H__
21
22#include <algorithm>
23#include <vector>
24
25#include "fst/lib/queue.h"
26
27
28namespace fst {
29
30template <typename T> class PartitionIterator;
31
32// \class Partition
33// \brief Defines a partitioning of states. Typically used to represent
34//        equivalence classes for Fst operations like minimization.
35//
36template <typename T>
37class Partition {
38  friend class PartitionIterator<T>;
39
40  struct Element {
41   Element() : value(0), next(0), prev(0) {}
42   Element(T v) : value(v), next(0), prev(0) {}
43
44   T        value;
45   Element* next;
46   Element* prev;
47  };
48
49 public:
50  Partition() {}
51
52  Partition(T num_states) {
53    Initialize(num_states);
54  }
55
56  ~Partition() {
57    for (size_t i = 0; i < elements_.size(); ++i)
58      delete elements_[i];
59  }
60
61  // Create an empty partition for num_states. At initialization time
62  // all elements are not assigned to a class (i.e class_index = -1).
63  // Initialize just creates num_states of elements. All element
64  // operations are then done by simply disconnecting the element from
65  // it current class and placing it at the head of the next class.
66  void Initialize(size_t num_states) {
67    for (size_t i = 0; i < elements_.size(); ++i)
68      delete elements_[i];
69    elements_.clear();
70    classes_.clear();
71    class_index_.clear();
72
73    elements_.resize(num_states);
74    class_index_.resize(num_states, -1);
75    class_size_.reserve(num_states);
76    for (size_t i = 0; i < num_states; ++i)
77      elements_[i] = new Element(i);
78    num_states_ = num_states;
79  }
80
81  // Add a class, resize classes_ and class_size_ resource by 1.
82  size_t AddClass() {
83    size_t num_classes = classes_.size();
84    classes_.resize(num_classes + 1, 0);
85    class_size_.resize(num_classes + 1, 0);
86    class_split_.resize(num_classes + 1, 0);
87    split_size_.resize(num_classes + 1, 0);
88    return num_classes;
89  }
90
91  void AllocateClasses(T num_classes) {
92    size_t n = classes_.size() + num_classes;
93    classes_.resize(n, 0);
94    class_size_.resize(n, 0);
95    class_split_.resize(n, 0);
96    split_size_.resize(n, 0);
97  }
98
99  // Add element_id to class_id. The Add method is used to initialize
100  // partition. Once elements have been added to a class, you need to
101  // use the Move() method move an element from once class to another.
102  void Add(T element_id, T class_id) {
103    Element* element = elements_[element_id];
104
105    if (classes_[class_id])
106      classes_[class_id]->prev = element;
107    element->next = classes_[class_id];
108    element->prev = 0;
109    classes_[class_id] = element;
110
111    class_index_[element_id] = class_id;
112    class_size_[class_id]++;
113  }
114
115  // Move and element_id to class_id. Disconnects (removes) element
116  // from it current class and
117  void Move(T element_id, T class_id) {
118    T old_class_id = class_index_[element_id];
119
120    Element* element = elements_[element_id];
121    if (element->next) element->next->prev = element->prev;
122    if (element->prev) element->prev->next = element->next;
123    else               classes_[old_class_id] = element->next;
124
125    Add(element_id, class_id);
126    class_size_[old_class_id]--;
127  }
128
129  // split class on the element_id
130  void SplitOn(T element_id) {
131    T class_id = class_index_[element_id];
132    if (class_size_[class_id] == 1) return;
133
134    // first time class is split
135    if (split_size_[class_id] == 0)
136      visited_classes_.push_back(class_id);
137
138    // increment size of split (set of element at head of chain)
139    split_size_[class_id]++;
140
141    // update split point
142    if (class_split_[class_id] == 0)
143      class_split_[class_id] = classes_[class_id];
144    if (class_split_[class_id] == elements_[element_id])
145      class_split_[class_id] = elements_[element_id]->next;
146
147    // move to head of chain in same class
148    Move(element_id, class_id);
149  }
150
151  // Finalize class_id, split if required, and update class_splits,
152  // class indices of the newly created class. Returns the new_class id
153  // or -1 if no new class was created.
154  T SplitRefine(T class_id) {
155    // only split if necessary
156    if (class_size_[class_id] == split_size_[class_id]) {
157      class_split_[class_id] = 0;
158      split_size_[class_id] = 0;
159      return -1;
160    } else {
161
162      T new_class = AddClass();
163      size_t remainder = class_size_[class_id] - split_size_[class_id];
164      if (remainder < (size_t)split_size_[class_id]) {  // add smaller
165        Element* split_el   = class_split_[class_id];
166        classes_[new_class] = split_el;
167        class_size_[class_id] = split_size_[class_id];
168        class_size_[new_class] = remainder;
169        split_el->prev->next = 0;
170        split_el->prev = 0;
171      } else {
172        Element* split_el   = class_split_[class_id];
173        classes_[new_class] = classes_[class_id];
174        class_size_[class_id] = remainder;
175        class_size_[new_class] = split_size_[class_id];
176        split_el->prev->next = 0;
177        split_el->prev = 0;
178        classes_[class_id] = split_el;
179      }
180
181      // update class index for element in new class
182      for (Element* el = classes_[new_class]; el; el = el->next)
183        class_index_[el->value] = new_class;
184
185      class_split_[class_id] = 0;
186      split_size_[class_id] = 0;
187
188      return new_class;
189    }
190  }
191
192  // Once all states have been processed for a particular class C, we
193  // can finalize the split. FinalizeSplit() will update each block in the
194  // partition, create new once and update the queue of active classes
195  // that require further refinement.
196  template <class Queue>
197  void FinalizeSplit(Queue* L) {
198    for (size_t i = 0; i < visited_classes_.size(); ++i) {
199      T new_class = SplitRefine(visited_classes_[i]);
200      if (new_class != -1 && L)
201        L->Enqueue(new_class);
202    }
203    visited_classes_.clear();
204  }
205
206
207  const T class_id(T element_id) const {
208    return class_index_[element_id];
209  }
210
211  const vector<T>& class_sizes() const {
212    return class_size_;
213  }
214
215  size_t class_size(T class_id)  const {
216    return class_size_[class_id];
217  }
218
219  const T num_classes() const {
220    return classes_.size();
221  }
222
223
224 private:
225  int num_states_;
226
227  // container of all elements (owner of ptrs)
228  vector<Element*> elements_;
229
230  // linked list of elements belonging to class
231  vector<Element*> classes_;
232
233  // pointer to split point for each class
234  vector<Element*> class_split_;
235
236  // class index of element
237  vector<T> class_index_;
238
239  // class sizes
240  vector<T> class_size_;
241
242  // size of split for each class
243  vector<T> split_size_;
244
245  // set of visited classes to be used in split refine
246  vector<T> visited_classes_;
247};
248
249
250// iterate over members of a class in a partition
251template <typename T>
252class PartitionIterator {
253  typedef typename Partition<T>::Element Element;
254 public:
255  PartitionIterator(const Partition<T>& partition, T class_id)
256      : p_(partition),
257        element_(p_.classes_[class_id]),
258        class_id_(class_id) {}
259
260  bool Done() {
261    return (element_ == 0);
262  }
263
264  const T Value() {
265    return (element_->value);
266  }
267
268  void Next() {
269    element_ = element_->next;
270  }
271
272  void Reset() {
273    element_ = p_.classes_[class_id_];
274  }
275
276 private:
277  const Partition<T>& p_;
278
279  const Element* element_;
280
281  T class_id_;
282};
283}
284
285#endif  // FST_LIB_PARTITION_H__
286