1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// partition.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: johans@google.com (Johan Schalkwyk)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file Functions and classes to create a partition of states
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_PARTITION_H__
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_PARTITION_H__
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector>
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector;
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <algorithm>
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/queue.h>
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename T> class PartitionIterator;
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \class Partition
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \brief Defines a partitioning of states. Typically used to represent
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//        equivalence classes for Fst operations like minimization.
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename T>
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass Partition {
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  friend class PartitionIterator<T>;
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  struct Element {
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   Element() : value(0), next(0), prev(0) {}
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   Element(T v) : value(v), next(0), prev(0) {}
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   T        value;
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   Element* next;
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   Element* prev;
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  };
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Partition() {}
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Partition(T num_states) {
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Initialize(num_states);
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ~Partition() {
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (size_t i = 0; i < elements_.size(); ++i)
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete elements_[i];
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Create an empty partition for num_states. At initialization time
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // all elements are not assigned to a class (i.e class_index = -1).
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Initialize just creates num_states of elements. All element
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // operations are then done by simply disconnecting the element from
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // it current class and placing it at the head of the next class.
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Initialize(size_t num_states) {
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (size_t i = 0; i < elements_.size(); ++i)
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete elements_[i];
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    elements_.clear();
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    classes_.clear();
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    class_index_.clear();
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    elements_.resize(num_states);
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    class_index_.resize(num_states, -1);
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    class_size_.reserve(num_states);
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (size_t i = 0; i < num_states; ++i)
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      elements_[i] = new Element(i);
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    num_states_ = num_states;
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Add a class, resize classes_ and class_size_ resource by 1.
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t AddClass() {
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    size_t num_classes = classes_.size();
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    classes_.resize(num_classes + 1, 0);
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    class_size_.resize(num_classes + 1, 0);
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    class_split_.resize(num_classes + 1, 0);
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    split_size_.resize(num_classes + 1, 0);
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return num_classes;
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void AllocateClasses(T num_classes) {
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    size_t n = classes_.size() + num_classes;
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    classes_.resize(n, 0);
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    class_size_.resize(n, 0);
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    class_split_.resize(n, 0);
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    split_size_.resize(n, 0);
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Add element_id to class_id. The Add method is used to initialize
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // partition. Once elements have been added to a class, you need to
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // use the Move() method move an element from once class to another.
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Add(T element_id, T class_id) {
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Element* element = elements_[element_id];
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (classes_[class_id])
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      classes_[class_id]->prev = element;
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    element->next = classes_[class_id];
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    element->prev = 0;
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    classes_[class_id] = element;
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    class_index_[element_id] = class_id;
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    class_size_[class_id]++;
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Move and element_id to class_id. Disconnects (removes) element
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // from it current class and
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Move(T element_id, T class_id) {
123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    T old_class_id = class_index_[element_id];
124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Element* element = elements_[element_id];
126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (element->next) element->next->prev = element->prev;
127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (element->prev) element->prev->next = element->next;
128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    else               classes_[old_class_id] = element->next;
129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Add(element_id, class_id);
131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    class_size_[old_class_id]--;
132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // split class on the element_id
135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SplitOn(T element_id) {
136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    T class_id = class_index_[element_id];
137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (class_size_[class_id] == 1) return;
138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // first time class is split
140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (split_size_[class_id] == 0)
141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      visited_classes_.push_back(class_id);
142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // increment size of split (set of element at head of chain)
144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    split_size_[class_id]++;
145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // update split point
147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (class_split_[class_id] == 0)
148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      class_split_[class_id] = classes_[class_id];
149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (class_split_[class_id] == elements_[element_id])
150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      class_split_[class_id] = elements_[element_id]->next;
151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // move to head of chain in same class
153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Move(element_id, class_id);
154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Finalize class_id, split if required, and update class_splits,
157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // class indices of the newly created class. Returns the new_class id
158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // or -1 if no new class was created.
159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  T SplitRefine(T class_id) {
160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // only split if necessary
161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (class_size_[class_id] == split_size_[class_id]) {
162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      class_split_[class_id] = 0;
163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      split_size_[class_id] = 0;
164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return -1;
165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      T new_class = AddClass();
168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      size_t remainder = class_size_[class_id] - split_size_[class_id];
169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (remainder < split_size_[class_id]) {  // add smaller
170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        Element* split_el   = class_split_[class_id];
171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        classes_[new_class] = split_el;
172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        class_size_[class_id] = split_size_[class_id];
173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        class_size_[new_class] = remainder;
174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        split_el->prev->next = 0;
175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        split_el->prev = 0;
176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else {
177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        Element* split_el   = class_split_[class_id];
178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        classes_[new_class] = classes_[class_id];
179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        class_size_[class_id] = remainder;
180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        class_size_[new_class] = split_size_[class_id];
181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        split_el->prev->next = 0;
182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        split_el->prev = 0;
183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        classes_[class_id] = split_el;
184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // update class index for element in new class
187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (Element* el = classes_[new_class]; el; el = el->next)
188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        class_index_[el->value] = new_class;
189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      class_split_[class_id] = 0;
191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      split_size_[class_id] = 0;
192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return new_class;
194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Once all states have been processed for a particular class C, we
198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // can finalize the split. FinalizeSplit() will update each block in the
199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // partition, create new once and update the queue of active classes
200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // that require further refinement.
201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class Queue>
202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void FinalizeSplit(Queue* L) {
203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (size_t i = 0; i < visited_classes_.size(); ++i) {
204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      T new_class = SplitRefine(visited_classes_[i]);
205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (new_class != -1 && L)
206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        L->Enqueue(new_class);
207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    visited_classes_.clear();
209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const T class_id(T element_id) const {
213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return class_index_[element_id];
214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const vector<T>& class_sizes() const {
217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return class_size_;
218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const size_t class_size(T class_id)  const {
221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return class_size_[class_id];
222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const T num_classes() const {
225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return classes_.size();
226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int num_states_;
231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // container of all elements (owner of ptrs)
233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<Element*> elements_;
234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // linked list of elements belonging to class
236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<Element*> classes_;
237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // pointer to split point for each class
239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<Element*> class_split_;
240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // class index of element
242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<T> class_index_;
243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // class sizes
245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<T> class_size_;
246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // size of split for each class
248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<T> split_size_;
249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // set of visited classes to be used in split refine
251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<T> visited_classes_;
252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// iterate over members of a class in a partition
256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename T>
257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass PartitionIterator {
258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Partition<T>::Element Element;
259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  PartitionIterator(const Partition<T>& partition, T class_id)
261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : p_(partition),
262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        element_(p_.classes_[class_id]),
263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        class_id_(class_id) {}
264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Done() {
266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return (element_ == 0);
267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const T Value() {
270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return (element_->value);
271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Next() {
274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    element_ = element_->next;
275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Reset() {
278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    element_ = p_.classes_[class_id_];
279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const Partition<T>& p_;
283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const Element* element_;
285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  T class_id_;
287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_LIB_PARTITION_H__
291