1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// compose.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Compose a PDT and an FST.
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_EXTENSIONS_PDT_COMPOSE_H__
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_EXTENSIONS_PDT_COMPOSE_H__
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
245b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin#include <list>
255b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
265b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin#include <fst/extensions/pdt/pdt.h>
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/compose.h>
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
315b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// Return paren arcs for Find(kNoLabel).
325b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinconst uint32 kParenList =  0x00000001;
335b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
345b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// Return a kNolabel loop for Find(paren).
355b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinconst uint32 kParenLoop =  0x00000002;
365b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
375b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// This class is a matcher that treats parens as multi-epsilon labels.
385b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// It is most efficient if the parens are in a range non-overlapping with
395b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// the non-paren labels.
405b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class F>
415b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinclass ParenMatcher {
425b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin public:
435b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef SortedMatcher<F> M;
445b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef typename M::FST FST;
455b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef typename M::Arc Arc;
465b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef typename Arc::StateId StateId;
475b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef typename Arc::Label Label;
485b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef typename Arc::Weight Weight;
495b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
505b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  ParenMatcher(const FST &fst, MatchType match_type,
515b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin               uint32 flags = (kParenLoop | kParenList))
525b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      : matcher_(fst, match_type),
535b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        match_type_(match_type),
545b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        flags_(flags) {
555b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (match_type == MATCH_INPUT) {
565b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      loop_.ilabel = kNoLabel;
575b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      loop_.olabel = 0;
585b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    } else {
595b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      loop_.ilabel = 0;
605b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      loop_.olabel = kNoLabel;
615b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    }
625b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    loop_.weight = Weight::One();
635b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    loop_.nextstate = kNoStateId;
645b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
655b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
665b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  ParenMatcher(const ParenMatcher<F> &matcher, bool safe = false)
675b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      : matcher_(matcher.matcher_, safe),
685b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        match_type_(matcher.match_type_),
695b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        flags_(matcher.flags_),
705b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        open_parens_(matcher.open_parens_),
715b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        close_parens_(matcher.close_parens_),
725b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        loop_(matcher.loop_) {
735b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    loop_.nextstate = kNoStateId;
745b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
755b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
765b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  ParenMatcher<F> *Copy(bool safe = false) const {
775b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    return new ParenMatcher<F>(*this, safe);
785b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
795b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
805b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  MatchType Type(bool test) const { return matcher_.Type(test); }
815b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
825b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  void SetState(StateId s) {
835b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    matcher_.SetState(s);
845b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    loop_.nextstate = s;
855b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
865b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
875b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  bool Find(Label match_label);
885b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
895b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  bool Done() const {
905b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    return done_;
915b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
925b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
935b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  const Arc& Value() const {
945b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    return paren_loop_ ? loop_ : matcher_.Value();
955b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
965b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
975b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  void Next();
985b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
995b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  const FST &GetFst() const { return matcher_.GetFst(); }
1005b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1015b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  uint64 Properties(uint64 props) const { return matcher_.Properties(props); }
1025b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1035b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  uint32 Flags() const { return matcher_.Flags(); }
1045b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1055b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  void AddOpenParen(Label label) {
1065b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (label == 0) {
1075b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      FSTERROR() << "ParenMatcher: Bad open paren label: 0";
1085b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    } else {
1095b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      open_parens_.Insert(label);
1105b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    }
1115b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
1125b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1135b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  void AddCloseParen(Label label) {
1145b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (label == 0) {
1155b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      FSTERROR() << "ParenMatcher: Bad close paren label: 0";
1165b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    } else {
1175b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      close_parens_.Insert(label);
1185b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    }
1195b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
1205b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1215b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  void RemoveOpenParen(Label label) {
1225b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (label == 0) {
1235b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      FSTERROR() << "ParenMatcher: Bad open paren label: 0";
1245b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    } else {
1255b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      open_parens_.Erase(label);
1265b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    }
1275b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
1285b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1295b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  void RemoveCloseParen(Label label) {
1305b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (label == 0) {
1315b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      FSTERROR() << "ParenMatcher: Bad close paren label: 0";
1325b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    } else {
1335b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      close_parens_.Erase(label);
1345b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    }
1355b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
1365b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1375b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  void ClearOpenParens() {
1385b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    open_parens_.Clear();
1395b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
1405b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1415b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  void ClearCloseParens() {
1425b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    close_parens_.Clear();
1435b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
1445b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1455b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  bool IsOpenParen(Label label) const {
1465b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    return open_parens_.Member(label);
1475b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
1485b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1495b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  bool IsCloseParen(Label label) const {
1505b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    return close_parens_.Member(label);
1515b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
1525b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1535b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin private:
1545b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // Advances matcher to next open paren if it exists, returning true.
1555b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // O.w. returns false.
1565b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  bool NextOpenParen();
1575b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1585b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // Advances matcher to next open paren if it exists, returning true.
1595b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // O.w. returns false.
1605b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  bool NextCloseParen();
1615b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1625b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  M matcher_;
1635b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  MatchType match_type_;          // Type of match to perform
1645b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  uint32 flags_;
1655b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1665b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // open paren label set
1675b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  CompactSet<Label, kNoLabel> open_parens_;
1685b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1695b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // close paren label set
1705b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  CompactSet<Label, kNoLabel> close_parens_;
1715b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1725b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1735b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  bool open_paren_list_;         // Matching open paren list
1745b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  bool close_paren_list_;        // Matching close paren list
1755b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  bool paren_loop_;              // Current arc is the implicit paren loop
1765b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  mutable Arc loop_;             // For non-consuming symbols
1775b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  bool done_;                    // Matching done
1785b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1795b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  void operator=(const ParenMatcher<F> &);  // Disallow
1805b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin};
1815b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1825b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class M> inline
1835b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinbool ParenMatcher<M>::Find(Label match_label) {
1845b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  open_paren_list_ = false;
1855b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  close_paren_list_ = false;
1865b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  paren_loop_ = false;
1875b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  done_ = false;
1885b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1895b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // Returns all parenthesis arcs
1905b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  if (match_label == kNoLabel && (flags_ & kParenList)) {
1915b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (open_parens_.LowerBound() != kNoLabel) {
1925b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      matcher_.LowerBound(open_parens_.LowerBound());
1935b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      open_paren_list_ = NextOpenParen();
1945b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      if (open_paren_list_) return true;
1955b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    }
1965b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (close_parens_.LowerBound() != kNoLabel) {
1975b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      matcher_.LowerBound(close_parens_.LowerBound());
1985b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      close_paren_list_ = NextCloseParen();
1995b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      if (close_paren_list_) return true;
2005b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    }
2015b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
2025b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
2035b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // Returns 'implicit' paren loop
2045b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  if (match_label > 0 && (flags_ & kParenLoop) &&
2055b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      (IsOpenParen(match_label) || IsCloseParen(match_label))) {
2065b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    paren_loop_ = true;
2075b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    return true;
2085b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
2095b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
2105b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // Returns all other labels
2115b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  if (matcher_.Find(match_label))
2125b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    return true;
2135b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
2145b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  done_ = true;
2155b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  return false;
2165b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin}
2175b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
2185b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class F> inline
2195b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinvoid ParenMatcher<F>::Next() {
2205b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  if (paren_loop_) {
2215b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    paren_loop_ = false;
2225b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    done_ = true;
2235b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  } else if (open_paren_list_) {
2245b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    matcher_.Next();
2255b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    open_paren_list_ = NextOpenParen();
2265b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (open_paren_list_) return;
2275b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
2285b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (close_parens_.LowerBound() != kNoLabel) {
2295b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      matcher_.LowerBound(close_parens_.LowerBound());
2305b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      close_paren_list_ = NextCloseParen();
2315b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      if (close_paren_list_) return;
2325b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    }
2335b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    done_ = !matcher_.Find(kNoLabel);
2345b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  } else if (close_paren_list_) {
2355b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    matcher_.Next();
2365b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    close_paren_list_ = NextCloseParen();
2375b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (close_paren_list_) return;
2385b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    done_ = !matcher_.Find(kNoLabel);
2395b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  } else {
2405b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    matcher_.Next();
2415b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    done_ = matcher_.Done();
2425b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
2435b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin}
2445b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
2455b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// Advances matcher to next open paren if it exists, returning true.
2465b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// O.w. returns false.
2475b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class F> inline
2485b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinbool ParenMatcher<F>::NextOpenParen() {
2495b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  for (; !matcher_.Done(); matcher_.Next()) {
2505b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    Label label = match_type_ == MATCH_INPUT ?
2515b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        matcher_.Value().ilabel : matcher_.Value().olabel;
2525b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (label > open_parens_.UpperBound())
2535b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      return false;
2545b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (IsOpenParen(label))
2555b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      return true;
2565b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
2575b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  return false;
2585b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin}
2595b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
2605b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// Advances matcher to next close paren if it exists, returning true.
2615b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// O.w. returns false.
2625b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class F> inline
2635b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinbool ParenMatcher<F>::NextCloseParen() {
2645b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  for (; !matcher_.Done(); matcher_.Next()) {
2655b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    Label label = match_type_ == MATCH_INPUT ?
2665b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        matcher_.Value().ilabel : matcher_.Value().olabel;
2675b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (label > close_parens_.UpperBound())
2685b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      return false;
2695b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (IsCloseParen(label))
2705b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      return true;
2715b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
2725b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  return false;
2735b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin}
2745b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
2755b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
2765b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class F>
2775b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinclass ParenFilter {
2785b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin public:
2795b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef typename F::FST1 FST1;
2805b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef typename F::FST2 FST2;
2815b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef typename F::Arc Arc;
2825b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef typename Arc::StateId StateId;
2835b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef typename Arc::Label Label;
2845b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef typename Arc::Weight Weight;
2855b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef typename F::Matcher1 Matcher1;
2865b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef typename F::Matcher2 Matcher2;
2875b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef typename F::FilterState FilterState1;
2885b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef StateId StackId;
2895b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef PdtStack<StackId, Label> ParenStack;
2905b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef IntegerFilterState<StackId> FilterState2;
2915b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef PairFilterState<FilterState1, FilterState2> FilterState;
2925b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef ParenFilter<F> Filter;
2935b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
2945b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  ParenFilter(const FST1 &fst1, const FST2 &fst2,
2955b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin              Matcher1 *matcher1 = 0,  Matcher2 *matcher2 = 0,
2965b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin              const vector<pair<Label, Label> > *parens = 0,
2975b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin              bool expand = false, bool keep_parens = true)
2985b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      : filter_(fst1, fst2, matcher1, matcher2),
2995b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        parens_(parens ? *parens : vector<pair<Label, Label> >()),
3005b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        expand_(expand),
3015b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        keep_parens_(keep_parens),
3025b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        f_(FilterState::NoState()),
3035b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        stack_(parens_),
3045b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        paren_id_(-1) {
3055b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (parens) {
3065b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      for (size_t i = 0; i < parens->size(); ++i) {
3075b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        const pair<Label, Label>  &p = (*parens)[i];
3085b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        parens_.push_back(p);
3095b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        GetMatcher1()->AddOpenParen(p.first);
3105b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        GetMatcher2()->AddOpenParen(p.first);
3115b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        if (!expand_) {
3125b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin          GetMatcher1()->AddCloseParen(p.second);
3135b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin          GetMatcher2()->AddCloseParen(p.second);
3145b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        }
3155b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      }
3165b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    }
3175b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
3185b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
3195b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  ParenFilter(const Filter &filter, bool safe = false)
3205b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      : filter_(filter.filter_, safe),
3215b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        parens_(filter.parens_),
3225b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        expand_(filter.expand_),
3235b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        keep_parens_(filter.keep_parens_),
3245b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        f_(FilterState::NoState()),
3255b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        stack_(filter.parens_),
3265b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        paren_id_(-1) { }
3275b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
3285b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  FilterState Start() const {
3295b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    return FilterState(filter_.Start(), FilterState2(0));
3305b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
3315b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
3325b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  void SetState(StateId s1, StateId s2, const FilterState &f) {
3335b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    f_ = f;
3345b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    filter_.SetState(s1, s2, f_.GetState1());
3355b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (!expand_)
3365b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      return;
3375b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
3385b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    ssize_t paren_id = stack_.Top(f.GetState2().GetState());
3395b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (paren_id != paren_id_) {
3405b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      if (paren_id_ != -1) {
3415b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        GetMatcher1()->RemoveCloseParen(parens_[paren_id_].second);
3425b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        GetMatcher2()->RemoveCloseParen(parens_[paren_id_].second);
3435b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      }
3445b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      paren_id_ = paren_id;
3455b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      if (paren_id_ != -1) {
3465b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        GetMatcher1()->AddCloseParen(parens_[paren_id_].second);
3475b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        GetMatcher2()->AddCloseParen(parens_[paren_id_].second);
3485b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      }
3495b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    }
3505b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
3515b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
3525b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  FilterState FilterArc(Arc *arc1, Arc *arc2) const {
3535b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    FilterState1 f1 = filter_.FilterArc(arc1, arc2);
3545b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    const FilterState2 &f2 = f_.GetState2();
3555b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (f1 == FilterState1::NoState())
3565b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      return FilterState::NoState();
3575b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
3585b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (arc1->olabel == kNoLabel && arc2->ilabel) {         // arc2 parentheses
3595b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      if (keep_parens_) {
3605b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        arc1->ilabel = arc2->ilabel;
3615b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      } else if (arc2->ilabel) {
3625b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        arc2->olabel = arc1->ilabel;
3635b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      }
3645b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      return FilterParen(arc2->ilabel, f1, f2);
3655b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    } else if (arc2->ilabel == kNoLabel && arc1->olabel) {  // arc1 parentheses
3665b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      if (keep_parens_) {
3675b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        arc2->olabel = arc1->olabel;
3685b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      } else {
3695b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        arc1->ilabel = arc2->olabel;
3705b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      }
3715b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      return FilterParen(arc1->olabel, f1, f2);
3725b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    } else {
3735b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      return FilterState(f1, f2);
3745b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    }
3755b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
3765b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
3775b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  void FilterFinal(Weight *w1, Weight *w2) const {
3785b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (f_.GetState2().GetState() != 0)
3795b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      *w1 = Weight::Zero();
3805b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    filter_.FilterFinal(w1, w2);
3815b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
3825b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
3835b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // Return resp matchers. Ownership stays with filter.
3845b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  Matcher1 *GetMatcher1() { return filter_.GetMatcher1(); }
3855b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  Matcher2 *GetMatcher2() { return filter_.GetMatcher2(); }
3865b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
3875b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  uint64 Properties(uint64 iprops) const {
3885b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    uint64 oprops = filter_.Properties(iprops);
3895b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    return oprops & kILabelInvariantProperties & kOLabelInvariantProperties;
3905b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
3915b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
3925b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin private:
3935b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  const FilterState FilterParen(Label label, const FilterState1 &f1,
3945b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                                const FilterState2 &f2) const {
3955b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (!expand_)
3965b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      return FilterState(f1, f2);
3975b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
3985b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    StackId stack_id = stack_.Find(f2.GetState(), label);
3995b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (stack_id < 0) {
4005b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      return FilterState::NoState();
4015b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    } else {
4025b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      return FilterState(f1, FilterState2(stack_id));
4035b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    }
4045b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
4055b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
4065b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  F filter_;
4075b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  vector<pair<Label, Label> > parens_;
4085b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  bool expand_;                    // Expands to FST
4095b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  bool keep_parens_;               // Retains parentheses in output
4105b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  FilterState f_;                  // Current filter state
4115b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  mutable ParenStack stack_;
4125b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  ssize_t paren_id_;
4135b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin};
4145b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
415f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Class to setup composition options for PDT composition.
416f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Default is for the PDT as the first composition argument.
417f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class Arc, bool left_pdt = true>
4185b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinclass PdtComposeFstOptions : public
419f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonComposeFstOptions<Arc,
4205b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                  ParenMatcher< Fst<Arc> >,
4215b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                  ParenFilter<AltSequenceComposeFilter<
4225b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                                ParenMatcher< Fst<Arc> > > > > {
423f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
424f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::Label Label;
4255b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef ParenMatcher< Fst<Arc> > PdtMatcher;
4265b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef ParenFilter<AltSequenceComposeFilter<PdtMatcher> > PdtFilter;
427f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef ComposeFstOptions<Arc, PdtMatcher, PdtFilter> COptions;
428f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using COptions::matcher1;
429f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using COptions::matcher2;
430f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using COptions::filter;
431f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
4325b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  PdtComposeFstOptions(const Fst<Arc> &ifst1,
433f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                    const vector<pair<Label, Label> > &parens,
4345b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                       const Fst<Arc> &ifst2, bool expand = false,
4355b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                       bool keep_parens = true) {
4365b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    matcher1 = new PdtMatcher(ifst1, MATCH_OUTPUT, kParenList);
4375b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    matcher2 = new PdtMatcher(ifst2, MATCH_INPUT, kParenLoop);
438f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
4395b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    filter = new PdtFilter(ifst1, ifst2, matcher1, matcher2, &parens,
4405b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                           expand, keep_parens);
441f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
442f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
443f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
444f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Class to setup composition options for PDT with FST composition.
445f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization is for the FST as the first composition argument.
446f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class Arc>
4475b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinclass PdtComposeFstOptions<Arc, false> : public
448f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonComposeFstOptions<Arc,
4495b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                  ParenMatcher< Fst<Arc> >,
4505b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                  ParenFilter<SequenceComposeFilter<
4515b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                                ParenMatcher< Fst<Arc> > > > > {
452f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
453f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::Label Label;
4545b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef ParenMatcher< Fst<Arc> > PdtMatcher;
4555b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef ParenFilter<SequenceComposeFilter<PdtMatcher> > PdtFilter;
456f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef ComposeFstOptions<Arc, PdtMatcher, PdtFilter> COptions;
457f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using COptions::matcher1;
458f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using COptions::matcher2;
459f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using COptions::filter;
460f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
4615b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  PdtComposeFstOptions(const Fst<Arc> &ifst1,
4625b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                       const Fst<Arc> &ifst2,
4635b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                       const vector<pair<Label, Label> > &parens,
4645b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                       bool expand = false, bool keep_parens = true) {
4655b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    matcher1 = new PdtMatcher(ifst1, MATCH_OUTPUT, kParenLoop);
4665b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    matcher2 = new PdtMatcher(ifst2, MATCH_INPUT, kParenList);
467f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
4685b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    filter = new PdtFilter(ifst1, ifst2, matcher1, matcher2, &parens,
4695b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                           expand, keep_parens);
470f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
471f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
472f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
4735b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinenum PdtComposeFilter {
4745b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  PAREN_FILTER,          // Bar-Hillel construction; keeps parentheses
4755b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  EXPAND_FILTER,         // Bar-Hillel + expansion; removes parentheses
4765b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  EXPAND_PAREN_FILTER,   // Bar-Hillel + expansion; keeps parentheses
4775b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin};
4785b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
4795b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinstruct PdtComposeOptions {
4805b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  bool connect;  // Connect output
4815b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  PdtComposeFilter filter_type;  // Which pre-defined filter to use
4825b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
4835b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  explicit PdtComposeOptions(bool c, PdtComposeFilter ft = PAREN_FILTER)
4845b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      : connect(c), filter_type(ft) {}
4855b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  PdtComposeOptions() : connect(true), filter_type(PAREN_FILTER) {}
4865b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin};
487f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
488f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Composes pushdown transducer (PDT) encoded as an FST (1st arg) and
489f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// an FST (2nd arg) with the result also a PDT encoded as an Fst. (3rd arg).
490f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// In the PDTs, some transitions are labeled with open or close
491f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// parentheses. To be interpreted as a PDT, the parens must balance on
492f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// a path (see PdtExpand()). The open-close parenthesis label pairs
493f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// are passed in 'parens'.
494f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class Arc>
495f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Compose(const Fst<Arc> &ifst1,
496f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             const vector<pair<typename Arc::Label,
497f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                               typename Arc::Label> > &parens,
498f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             const Fst<Arc> &ifst2,
499f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             MutableFst<Arc> *ofst,
5005b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin             const PdtComposeOptions &opts = PdtComposeOptions()) {
5015b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  bool expand = opts.filter_type != PAREN_FILTER;
5025b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  bool keep_parens = opts.filter_type != EXPAND_FILTER;
5035b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  PdtComposeFstOptions<Arc, true> copts(ifst1, parens, ifst2,
5045b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                                        expand, keep_parens);
505f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  copts.gc_limit = 0;
506f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
507f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (opts.connect)
508f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Connect(ofst);
509f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
510f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
511f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Composes an FST (1st arg) and pushdown transducer (PDT) encoded as
512f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// an FST (2nd arg) with the result also a PDT encoded as an Fst (3rd arg).
513f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// In the PDTs, some transitions are labeled with open or close
514f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// parentheses. To be interpreted as a PDT, the parens must balance on
515f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// a path (see ExpandFst()). The open-close parenthesis label pairs
516f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// are passed in 'parens'.
517f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class Arc>
518f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Compose(const Fst<Arc> &ifst1,
519f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             const Fst<Arc> &ifst2,
520f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             const vector<pair<typename Arc::Label,
521f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                               typename Arc::Label> > &parens,
522f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             MutableFst<Arc> *ofst,
5235b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin             const PdtComposeOptions &opts = PdtComposeOptions()) {
5245b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  bool expand = opts.filter_type != PAREN_FILTER;
5255b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  bool keep_parens = opts.filter_type != EXPAND_FILTER;
5265b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  PdtComposeFstOptions<Arc, false> copts(ifst1, ifst2, parens,
5275b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                                         expand, keep_parens);
528f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  copts.gc_limit = 0;
529f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
530f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (opts.connect)
531f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Connect(ofst);
532f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
533f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
534f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
535f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
536f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_EXTENSIONS_PDT_COMPOSE_H__
537