1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// compose.h 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Compose a PDT and an FST. 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_EXTENSIONS_PDT_COMPOSE_H__ 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_EXTENSIONS_PDT_COMPOSE_H__ 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 245b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin#include <list> 255b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 265b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin#include <fst/extensions/pdt/pdt.h> 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/compose.h> 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 315b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// Return paren arcs for Find(kNoLabel). 325b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinconst uint32 kParenList = 0x00000001; 335b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 345b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// Return a kNolabel loop for Find(paren). 355b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinconst uint32 kParenLoop = 0x00000002; 365b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 375b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// This class is a matcher that treats parens as multi-epsilon labels. 385b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// It is most efficient if the parens are in a range non-overlapping with 395b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// the non-paren labels. 405b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class F> 415b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinclass ParenMatcher { 425b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin public: 435b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef SortedMatcher<F> M; 445b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef typename M::FST FST; 455b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef typename M::Arc Arc; 465b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef typename Arc::StateId StateId; 475b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef typename Arc::Label Label; 485b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef typename Arc::Weight Weight; 495b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 505b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin ParenMatcher(const FST &fst, MatchType match_type, 515b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin uint32 flags = (kParenLoop | kParenList)) 525b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin : matcher_(fst, match_type), 535b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin match_type_(match_type), 545b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin flags_(flags) { 555b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (match_type == MATCH_INPUT) { 565b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin loop_.ilabel = kNoLabel; 575b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin loop_.olabel = 0; 585b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } else { 595b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin loop_.ilabel = 0; 605b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin loop_.olabel = kNoLabel; 615b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 625b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin loop_.weight = Weight::One(); 635b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin loop_.nextstate = kNoStateId; 645b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 655b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 665b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin ParenMatcher(const ParenMatcher<F> &matcher, bool safe = false) 675b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin : matcher_(matcher.matcher_, safe), 685b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin match_type_(matcher.match_type_), 695b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin flags_(matcher.flags_), 705b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin open_parens_(matcher.open_parens_), 715b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin close_parens_(matcher.close_parens_), 725b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin loop_(matcher.loop_) { 735b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin loop_.nextstate = kNoStateId; 745b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 755b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 765b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin ParenMatcher<F> *Copy(bool safe = false) const { 775b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return new ParenMatcher<F>(*this, safe); 785b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 795b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 805b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin MatchType Type(bool test) const { return matcher_.Type(test); } 815b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 825b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin void SetState(StateId s) { 835b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin matcher_.SetState(s); 845b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin loop_.nextstate = s; 855b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 865b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 875b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin bool Find(Label match_label); 885b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 895b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin bool Done() const { 905b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return done_; 915b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 925b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 935b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin const Arc& Value() const { 945b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return paren_loop_ ? loop_ : matcher_.Value(); 955b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 965b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 975b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin void Next(); 985b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 995b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin const FST &GetFst() const { return matcher_.GetFst(); } 1005b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1015b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin uint64 Properties(uint64 props) const { return matcher_.Properties(props); } 1025b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1035b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin uint32 Flags() const { return matcher_.Flags(); } 1045b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1055b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin void AddOpenParen(Label label) { 1065b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (label == 0) { 1075b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin FSTERROR() << "ParenMatcher: Bad open paren label: 0"; 1085b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } else { 1095b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin open_parens_.Insert(label); 1105b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 1115b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 1125b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1135b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin void AddCloseParen(Label label) { 1145b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (label == 0) { 1155b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin FSTERROR() << "ParenMatcher: Bad close paren label: 0"; 1165b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } else { 1175b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin close_parens_.Insert(label); 1185b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 1195b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 1205b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1215b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin void RemoveOpenParen(Label label) { 1225b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (label == 0) { 1235b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin FSTERROR() << "ParenMatcher: Bad open paren label: 0"; 1245b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } else { 1255b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin open_parens_.Erase(label); 1265b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 1275b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 1285b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1295b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin void RemoveCloseParen(Label label) { 1305b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (label == 0) { 1315b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin FSTERROR() << "ParenMatcher: Bad close paren label: 0"; 1325b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } else { 1335b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin close_parens_.Erase(label); 1345b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 1355b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 1365b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1375b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin void ClearOpenParens() { 1385b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin open_parens_.Clear(); 1395b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 1405b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1415b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin void ClearCloseParens() { 1425b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin close_parens_.Clear(); 1435b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 1445b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1455b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin bool IsOpenParen(Label label) const { 1465b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return open_parens_.Member(label); 1475b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 1485b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1495b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin bool IsCloseParen(Label label) const { 1505b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return close_parens_.Member(label); 1515b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 1525b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1535b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin private: 1545b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin // Advances matcher to next open paren if it exists, returning true. 1555b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin // O.w. returns false. 1565b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin bool NextOpenParen(); 1575b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1585b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin // Advances matcher to next open paren if it exists, returning true. 1595b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin // O.w. returns false. 1605b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin bool NextCloseParen(); 1615b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1625b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin M matcher_; 1635b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin MatchType match_type_; // Type of match to perform 1645b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin uint32 flags_; 1655b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1665b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin // open paren label set 1675b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin CompactSet<Label, kNoLabel> open_parens_; 1685b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1695b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin // close paren label set 1705b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin CompactSet<Label, kNoLabel> close_parens_; 1715b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1725b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1735b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin bool open_paren_list_; // Matching open paren list 1745b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin bool close_paren_list_; // Matching close paren list 1755b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin bool paren_loop_; // Current arc is the implicit paren loop 1765b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin mutable Arc loop_; // For non-consuming symbols 1775b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin bool done_; // Matching done 1785b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1795b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin void operator=(const ParenMatcher<F> &); // Disallow 1805b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin}; 1815b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1825b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class M> inline 1835b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinbool ParenMatcher<M>::Find(Label match_label) { 1845b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin open_paren_list_ = false; 1855b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin close_paren_list_ = false; 1865b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin paren_loop_ = false; 1875b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin done_ = false; 1885b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1895b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin // Returns all parenthesis arcs 1905b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (match_label == kNoLabel && (flags_ & kParenList)) { 1915b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (open_parens_.LowerBound() != kNoLabel) { 1925b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin matcher_.LowerBound(open_parens_.LowerBound()); 1935b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin open_paren_list_ = NextOpenParen(); 1945b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (open_paren_list_) return true; 1955b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 1965b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (close_parens_.LowerBound() != kNoLabel) { 1975b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin matcher_.LowerBound(close_parens_.LowerBound()); 1985b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin close_paren_list_ = NextCloseParen(); 1995b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (close_paren_list_) return true; 2005b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 2015b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 2025b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 2035b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin // Returns 'implicit' paren loop 2045b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (match_label > 0 && (flags_ & kParenLoop) && 2055b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin (IsOpenParen(match_label) || IsCloseParen(match_label))) { 2065b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin paren_loop_ = true; 2075b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return true; 2085b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 2095b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 2105b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin // Returns all other labels 2115b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (matcher_.Find(match_label)) 2125b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return true; 2135b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 2145b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin done_ = true; 2155b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return false; 2165b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin} 2175b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 2185b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class F> inline 2195b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinvoid ParenMatcher<F>::Next() { 2205b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (paren_loop_) { 2215b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin paren_loop_ = false; 2225b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin done_ = true; 2235b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } else if (open_paren_list_) { 2245b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin matcher_.Next(); 2255b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin open_paren_list_ = NextOpenParen(); 2265b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (open_paren_list_) return; 2275b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 2285b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (close_parens_.LowerBound() != kNoLabel) { 2295b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin matcher_.LowerBound(close_parens_.LowerBound()); 2305b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin close_paren_list_ = NextCloseParen(); 2315b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (close_paren_list_) return; 2325b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 2335b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin done_ = !matcher_.Find(kNoLabel); 2345b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } else if (close_paren_list_) { 2355b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin matcher_.Next(); 2365b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin close_paren_list_ = NextCloseParen(); 2375b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (close_paren_list_) return; 2385b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin done_ = !matcher_.Find(kNoLabel); 2395b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } else { 2405b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin matcher_.Next(); 2415b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin done_ = matcher_.Done(); 2425b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 2435b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin} 2445b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 2455b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// Advances matcher to next open paren if it exists, returning true. 2465b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// O.w. returns false. 2475b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class F> inline 2485b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinbool ParenMatcher<F>::NextOpenParen() { 2495b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin for (; !matcher_.Done(); matcher_.Next()) { 2505b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin Label label = match_type_ == MATCH_INPUT ? 2515b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin matcher_.Value().ilabel : matcher_.Value().olabel; 2525b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (label > open_parens_.UpperBound()) 2535b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return false; 2545b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (IsOpenParen(label)) 2555b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return true; 2565b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 2575b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return false; 2585b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin} 2595b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 2605b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// Advances matcher to next close paren if it exists, returning true. 2615b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// O.w. returns false. 2625b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class F> inline 2635b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinbool ParenMatcher<F>::NextCloseParen() { 2645b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin for (; !matcher_.Done(); matcher_.Next()) { 2655b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin Label label = match_type_ == MATCH_INPUT ? 2665b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin matcher_.Value().ilabel : matcher_.Value().olabel; 2675b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (label > close_parens_.UpperBound()) 2685b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return false; 2695b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (IsCloseParen(label)) 2705b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return true; 2715b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 2725b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return false; 2735b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin} 2745b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 2755b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 2765b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class F> 2775b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinclass ParenFilter { 2785b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin public: 2795b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef typename F::FST1 FST1; 2805b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef typename F::FST2 FST2; 2815b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef typename F::Arc Arc; 2825b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef typename Arc::StateId StateId; 2835b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef typename Arc::Label Label; 2845b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef typename Arc::Weight Weight; 2855b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef typename F::Matcher1 Matcher1; 2865b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef typename F::Matcher2 Matcher2; 2875b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef typename F::FilterState FilterState1; 2885b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef StateId StackId; 2895b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef PdtStack<StackId, Label> ParenStack; 2905b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef IntegerFilterState<StackId> FilterState2; 2915b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef PairFilterState<FilterState1, FilterState2> FilterState; 2925b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef ParenFilter<F> Filter; 2935b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 2945b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin ParenFilter(const FST1 &fst1, const FST2 &fst2, 2955b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin Matcher1 *matcher1 = 0, Matcher2 *matcher2 = 0, 2965b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin const vector<pair<Label, Label> > *parens = 0, 2975b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin bool expand = false, bool keep_parens = true) 2985b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin : filter_(fst1, fst2, matcher1, matcher2), 2995b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin parens_(parens ? *parens : vector<pair<Label, Label> >()), 3005b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin expand_(expand), 3015b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin keep_parens_(keep_parens), 3025b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin f_(FilterState::NoState()), 3035b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin stack_(parens_), 3045b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin paren_id_(-1) { 3055b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (parens) { 3065b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin for (size_t i = 0; i < parens->size(); ++i) { 3075b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin const pair<Label, Label> &p = (*parens)[i]; 3085b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin parens_.push_back(p); 3095b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin GetMatcher1()->AddOpenParen(p.first); 3105b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin GetMatcher2()->AddOpenParen(p.first); 3115b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (!expand_) { 3125b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin GetMatcher1()->AddCloseParen(p.second); 3135b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin GetMatcher2()->AddCloseParen(p.second); 3145b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 3155b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 3165b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 3175b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 3185b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 3195b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin ParenFilter(const Filter &filter, bool safe = false) 3205b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin : filter_(filter.filter_, safe), 3215b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin parens_(filter.parens_), 3225b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin expand_(filter.expand_), 3235b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin keep_parens_(filter.keep_parens_), 3245b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin f_(FilterState::NoState()), 3255b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin stack_(filter.parens_), 3265b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin paren_id_(-1) { } 3275b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 3285b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin FilterState Start() const { 3295b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return FilterState(filter_.Start(), FilterState2(0)); 3305b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 3315b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 3325b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin void SetState(StateId s1, StateId s2, const FilterState &f) { 3335b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin f_ = f; 3345b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin filter_.SetState(s1, s2, f_.GetState1()); 3355b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (!expand_) 3365b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return; 3375b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 3385b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin ssize_t paren_id = stack_.Top(f.GetState2().GetState()); 3395b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (paren_id != paren_id_) { 3405b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (paren_id_ != -1) { 3415b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin GetMatcher1()->RemoveCloseParen(parens_[paren_id_].second); 3425b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin GetMatcher2()->RemoveCloseParen(parens_[paren_id_].second); 3435b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 3445b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin paren_id_ = paren_id; 3455b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (paren_id_ != -1) { 3465b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin GetMatcher1()->AddCloseParen(parens_[paren_id_].second); 3475b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin GetMatcher2()->AddCloseParen(parens_[paren_id_].second); 3485b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 3495b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 3505b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 3515b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 3525b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin FilterState FilterArc(Arc *arc1, Arc *arc2) const { 3535b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin FilterState1 f1 = filter_.FilterArc(arc1, arc2); 3545b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin const FilterState2 &f2 = f_.GetState2(); 3555b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (f1 == FilterState1::NoState()) 3565b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return FilterState::NoState(); 3575b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 3585b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (arc1->olabel == kNoLabel && arc2->ilabel) { // arc2 parentheses 3595b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (keep_parens_) { 3605b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin arc1->ilabel = arc2->ilabel; 3615b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } else if (arc2->ilabel) { 3625b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin arc2->olabel = arc1->ilabel; 3635b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 3645b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return FilterParen(arc2->ilabel, f1, f2); 3655b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } else if (arc2->ilabel == kNoLabel && arc1->olabel) { // arc1 parentheses 3665b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (keep_parens_) { 3675b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin arc2->olabel = arc1->olabel; 3685b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } else { 3695b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin arc1->ilabel = arc2->olabel; 3705b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 3715b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return FilterParen(arc1->olabel, f1, f2); 3725b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } else { 3735b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return FilterState(f1, f2); 3745b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 3755b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 3765b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 3775b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin void FilterFinal(Weight *w1, Weight *w2) const { 3785b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (f_.GetState2().GetState() != 0) 3795b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin *w1 = Weight::Zero(); 3805b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin filter_.FilterFinal(w1, w2); 3815b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 3825b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 3835b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin // Return resp matchers. Ownership stays with filter. 3845b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin Matcher1 *GetMatcher1() { return filter_.GetMatcher1(); } 3855b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin Matcher2 *GetMatcher2() { return filter_.GetMatcher2(); } 3865b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 3875b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin uint64 Properties(uint64 iprops) const { 3885b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin uint64 oprops = filter_.Properties(iprops); 3895b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return oprops & kILabelInvariantProperties & kOLabelInvariantProperties; 3905b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 3915b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 3925b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin private: 3935b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin const FilterState FilterParen(Label label, const FilterState1 &f1, 3945b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin const FilterState2 &f2) const { 3955b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (!expand_) 3965b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return FilterState(f1, f2); 3975b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 3985b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin StackId stack_id = stack_.Find(f2.GetState(), label); 3995b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (stack_id < 0) { 4005b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return FilterState::NoState(); 4015b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } else { 4025b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return FilterState(f1, FilterState2(stack_id)); 4035b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 4045b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 4055b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 4065b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin F filter_; 4075b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin vector<pair<Label, Label> > parens_; 4085b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin bool expand_; // Expands to FST 4095b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin bool keep_parens_; // Retains parentheses in output 4105b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin FilterState f_; // Current filter state 4115b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin mutable ParenStack stack_; 4125b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin ssize_t paren_id_; 4135b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin}; 4145b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 415f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Class to setup composition options for PDT composition. 416f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Default is for the PDT as the first composition argument. 417f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class Arc, bool left_pdt = true> 4185b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinclass PdtComposeFstOptions : public 419f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonComposeFstOptions<Arc, 4205b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin ParenMatcher< Fst<Arc> >, 4215b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin ParenFilter<AltSequenceComposeFilter< 4225b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin ParenMatcher< Fst<Arc> > > > > { 423f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 424f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::Label Label; 4255b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef ParenMatcher< Fst<Arc> > PdtMatcher; 4265b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef ParenFilter<AltSequenceComposeFilter<PdtMatcher> > PdtFilter; 427f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef ComposeFstOptions<Arc, PdtMatcher, PdtFilter> COptions; 428f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using COptions::matcher1; 429f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using COptions::matcher2; 430f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using COptions::filter; 431f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 4325b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin PdtComposeFstOptions(const Fst<Arc> &ifst1, 433f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<pair<Label, Label> > &parens, 4345b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin const Fst<Arc> &ifst2, bool expand = false, 4355b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin bool keep_parens = true) { 4365b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin matcher1 = new PdtMatcher(ifst1, MATCH_OUTPUT, kParenList); 4375b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin matcher2 = new PdtMatcher(ifst2, MATCH_INPUT, kParenLoop); 438f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 4395b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin filter = new PdtFilter(ifst1, ifst2, matcher1, matcher2, &parens, 4405b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin expand, keep_parens); 441f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 442f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 443f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 444f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Class to setup composition options for PDT with FST composition. 445f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization is for the FST as the first composition argument. 446f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class Arc> 4475b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinclass PdtComposeFstOptions<Arc, false> : public 448f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonComposeFstOptions<Arc, 4495b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin ParenMatcher< Fst<Arc> >, 4505b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin ParenFilter<SequenceComposeFilter< 4515b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin ParenMatcher< Fst<Arc> > > > > { 452f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 453f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::Label Label; 4545b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef ParenMatcher< Fst<Arc> > PdtMatcher; 4555b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef ParenFilter<SequenceComposeFilter<PdtMatcher> > PdtFilter; 456f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef ComposeFstOptions<Arc, PdtMatcher, PdtFilter> COptions; 457f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using COptions::matcher1; 458f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using COptions::matcher2; 459f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using COptions::filter; 460f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 4615b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin PdtComposeFstOptions(const Fst<Arc> &ifst1, 4625b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin const Fst<Arc> &ifst2, 4635b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin const vector<pair<Label, Label> > &parens, 4645b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin bool expand = false, bool keep_parens = true) { 4655b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin matcher1 = new PdtMatcher(ifst1, MATCH_OUTPUT, kParenLoop); 4665b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin matcher2 = new PdtMatcher(ifst2, MATCH_INPUT, kParenList); 467f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 4685b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin filter = new PdtFilter(ifst1, ifst2, matcher1, matcher2, &parens, 4695b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin expand, keep_parens); 470f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 471f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 472f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 4735b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinenum PdtComposeFilter { 4745b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin PAREN_FILTER, // Bar-Hillel construction; keeps parentheses 4755b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin EXPAND_FILTER, // Bar-Hillel + expansion; removes parentheses 4765b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin EXPAND_PAREN_FILTER, // Bar-Hillel + expansion; keeps parentheses 4775b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin}; 4785b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 4795b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinstruct PdtComposeOptions { 4805b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin bool connect; // Connect output 4815b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin PdtComposeFilter filter_type; // Which pre-defined filter to use 4825b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 4835b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin explicit PdtComposeOptions(bool c, PdtComposeFilter ft = PAREN_FILTER) 4845b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin : connect(c), filter_type(ft) {} 4855b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin PdtComposeOptions() : connect(true), filter_type(PAREN_FILTER) {} 4865b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin}; 487f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 488f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Composes pushdown transducer (PDT) encoded as an FST (1st arg) and 489f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// an FST (2nd arg) with the result also a PDT encoded as an Fst. (3rd arg). 490f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// In the PDTs, some transitions are labeled with open or close 491f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// parentheses. To be interpreted as a PDT, the parens must balance on 492f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// a path (see PdtExpand()). The open-close parenthesis label pairs 493f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// are passed in 'parens'. 494f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class Arc> 495f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Compose(const Fst<Arc> &ifst1, 496f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<pair<typename Arc::Label, 497f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename Arc::Label> > &parens, 498f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Fst<Arc> &ifst2, 499f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MutableFst<Arc> *ofst, 5005b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin const PdtComposeOptions &opts = PdtComposeOptions()) { 5015b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin bool expand = opts.filter_type != PAREN_FILTER; 5025b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin bool keep_parens = opts.filter_type != EXPAND_FILTER; 5035b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin PdtComposeFstOptions<Arc, true> copts(ifst1, parens, ifst2, 5045b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin expand, keep_parens); 505f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson copts.gc_limit = 0; 506f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ofst = ComposeFst<Arc>(ifst1, ifst2, copts); 507f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (opts.connect) 508f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Connect(ofst); 509f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 510f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 511f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Composes an FST (1st arg) and pushdown transducer (PDT) encoded as 512f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// an FST (2nd arg) with the result also a PDT encoded as an Fst (3rd arg). 513f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// In the PDTs, some transitions are labeled with open or close 514f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// parentheses. To be interpreted as a PDT, the parens must balance on 515f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// a path (see ExpandFst()). The open-close parenthesis label pairs 516f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// are passed in 'parens'. 517f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class Arc> 518f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Compose(const Fst<Arc> &ifst1, 519f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Fst<Arc> &ifst2, 520f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<pair<typename Arc::Label, 521f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename Arc::Label> > &parens, 522f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MutableFst<Arc> *ofst, 5235b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin const PdtComposeOptions &opts = PdtComposeOptions()) { 5245b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin bool expand = opts.filter_type != PAREN_FILTER; 5255b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin bool keep_parens = opts.filter_type != EXPAND_FILTER; 5265b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin PdtComposeFstOptions<Arc, false> copts(ifst1, ifst2, parens, 5275b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin expand, keep_parens); 528f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson copts.gc_limit = 0; 529f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ofst = ComposeFst<Arc>(ifst1, ifst2, copts); 530f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (opts.connect) 531f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Connect(ofst); 532f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 533f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 534f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 535f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 536f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_EXTENSIONS_PDT_COMPOSE_H__ 537