union-find.h revision f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2
120111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber
220111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber// Licensed under the Apache License, Version 2.0 (the "License");
320111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber// you may not use this file except in compliance with the License.
420111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber// You may obtain a copy of the License at
520111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber//
620111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber//     http://www.apache.org/licenses/LICENSE-2.0
720111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber//
820111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber// Unless required by applicable law or agreed to in writing, software
920111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber// distributed under the License is distributed on an "AS IS" BASIS,
1020111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1120111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber// See the License for the specific language governing permissions and
1220111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber// limitations under the License.
1320111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber//
1420111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber// Copyright 2005-2010 Google, Inc.
1520111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber// Author: wojciech@google.com (Wojciech Skut)
1620111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber//
1720111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber// \file Union-Find algorithm for dense sets of non-negative
1820111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber// integers. Implemented using disjoint tree forests with rank
1920111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber// heuristics and path compression.
2020111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber
212d7d46fb2d7f5f80afbf060f25ed049079fb0fc9Andreas Huber#ifndef __fst_union_find_inl_h__
2220111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber#define __fst_union_find_inl_h__
2320111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber
2420111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber#include <stack>
2520111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber#include <vector>
2620111aa043c5f404472bc63b90bc5aad906b1101Andreas Huberusing std::vector;
2720111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber#include <fst/types.h>
2820111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber
2920111aa043c5f404472bc63b90bc5aad906b1101Andreas Hubernamespace fst {
3020111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber
3120111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber// Union-Find algorithm for dense sets of non-negative integers
3220111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber// (exact type: T).
3320111aa043c5f404472bc63b90bc5aad906b1101Andreas Hubertemplate <class T>
3420111aa043c5f404472bc63b90bc5aad906b1101Andreas Huberclass UnionFind {
3520111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber public:
3620111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber  // Ctor: creates a disjoint set forest for the range [0;max).
37c9a11abbb6b48604ea063daedd6118024cfbfa92Lajos Molnar  // 'fail' is a value indicating that an element hasn't been
38c9a11abbb6b48604ea063daedd6118024cfbfa92Lajos Molnar  // initialized using MakeSet(...). The upper bound of the range
39c9a11abbb6b48604ea063daedd6118024cfbfa92Lajos Molnar  // can be reset (increased) using MakeSet(...).
40c9a11abbb6b48604ea063daedd6118024cfbfa92Lajos Molnar  UnionFind(T max, T fail)
41c9a11abbb6b48604ea063daedd6118024cfbfa92Lajos Molnar      : parent_(max, fail), rank_(max), fail_(fail) { }
42c9a11abbb6b48604ea063daedd6118024cfbfa92Lajos Molnar
4320111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber  // Finds the representative of the set 'item' belongs to.
4420111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber  // Performs path compression if needed.
4520111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber  T FindSet(T item) {
4620111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber    if (item >= parent_.size()
4720111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber        || item == fail_
4820111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber        || parent_[item] == fail_) return fail_;
4920111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber
5020111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber    T *p = &parent_[item];
5120111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber    for (; *p != item; item = *p, p = &parent_[item]) {
5220111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber      exec_stack_.push(p);
5320111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber    }
5420111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber    for (; ! exec_stack_.empty(); exec_stack_.pop()) {
5520111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber      *exec_stack_.top() = *p;
5620111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber    }
5720111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber    return *p;
5820111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber  }
5920111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber
6020111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber  // Creates the (destructive) union of the sets x and y belong to.
6120111aa043c5f404472bc63b90bc5aad906b1101Andreas Huber  void Union(T x, T y) {
62    Link(FindSet(x), FindSet(y));
63  }
64
65  // Initialization of an element: creates a singleton set containing
66  // 'item'. The range [0;max) is reset if item >= max.
67  T MakeSet(T item) {
68    if (item >= parent_.size()) {
69      // New value in parent_ should be initialized to fail_
70      size_t nitem = item > 0 ? 2 * item : 2;
71      parent_.resize(nitem, fail_);
72      rank_.resize(nitem);
73    }
74    parent_[item] = item;
75    return item;
76  }
77
78  // Initialization of all elements starting from 0 to max - 1 to distinct sets
79  void MakeAllSet(T max) {
80    parent_.resize(max);
81    for (T item = 0; item < max; ++item) {
82      parent_[item] = item;
83    }
84  }
85
86 private:
87  vector<T> parent_;      // Parent nodes.
88  vector<int> rank_;      // Rank of an element = min. depth in tree.
89  T fail_;                // Value indicating lookup failure.
90  stack<T*> exec_stack_;  // Used for path compression.
91
92  // Links trees rooted in 'x' and 'y'.
93  void Link(T x, T y) {
94    if (x == y) return;
95
96    if (rank_[x] > rank_[y]) {
97      parent_[y] = x;
98    } else {
99      parent_[x] = y;
100      if (rank_[x] == rank_[y]) {
101        ++rank_[y];
102      }
103    }
104  }
105  DISALLOW_COPY_AND_ASSIGN(UnionFind);
106};
107
108}  // namespace fst
109
110#endif  // __fst_union_find_inl_h__
111