1/* FILE:		sub_grph.h
2 *  DATE MODIFIED:	31-Aug-07
3 *  DESCRIPTION:	Part of the  SREC graph compiler project source files.
4 *
5 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
6 *                                                                           *
7 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
8 *  you may not use this file except in compliance with the License.         *
9 *                                                                           *
10 *  You may obtain a copy of the License at                                  *
11 *      http://www.apache.org/licenses/LICENSE-2.0                           *
12 *                                                                           *
13 *  Unless required by applicable law or agreed to in writing, software      *
14 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
15 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
16 *  See the License for the specific language governing permissions and      *
17 *  limitations under the License.                                           *
18 *                                                                           *
19 *---------------------------------------------------------------------------*/
20
21#ifndef __sub_grph_h__
22#define __sub_grph_h__
23
24#include "netw_arc.h"
25
26#include "vocab.h"
27
28//  TODO:
29//  Guards various stages 1.  Compiling  2.  Processing
30
31#define SCOPE_ROOT         0
32#define SCOPE_ITEM        -1
33#define SCOPE_RULE        -2
34#define SCOPE_ONEOF       -3
35#define SCOPE_COUNT       -4
36#define SCOPE_OPT         -5
37#define SCOPE_REPEAT      -6
38
39#define NONE_LABEL        -100000           //  Null transition
40#define TAG_LABEL         -100001           //  Tag transition
41#define WB_LABEL          -100002           //  Word boundary transition
42#define TERMINAL_LABEL    -100003           //  Terminal transition
43#define BEGINSCOPE_LABEL  -100004           //  Start of scope
44#define ENDSCOPE_LABEL    -100005           //  End of scope
45#define BEGINRULE_LABEL   -100006           //  Start of rule
46#define ENDRULE_LABEL     -100007           //  End of rule
47#define DISCARD_LABEL     -100010           //  Discard (internal)
48#define INITIAL_LABEL     -100011           //  Initial silence
49#define FINAL_LABEL       -100012           //  Final silence
50
51#define MAXNUM             100000           //  A large number
52#define BLKSIZE             10000           //  Buffer size
53#define MAXITS                  5           //  Maximum equivalence reduction iterations
54
55// #define MAX(X,Y)        ((X) > (Y) ? (X) : (Y))
56// #define MIN(X,Y)        ((X) > (Y) ? (X) : (Y))
57
58// General functions
59bool IsSlot (std::string label);
60
61
62class DetCache {
63    public:
64
65        DetCache () { num= 0; dataPack= 0; };
66
67        void AddEntry (int comb, int first, int second) {
68            if (num == 0)
69                dataPack= new int [3*BLKSIZE];
70            else if ((num%BLKSIZE) == 0) {
71                int *newPack = new int [num+3*BLKSIZE];
72                for (int ii= 0; ii < (3*num); ii++)
73                    newPack[ii]= dataPack[ii];
74                delete [] dataPack;
75                dataPack= newPack;
76            }
77            dataPack[3*num]= first;
78            dataPack[3*num+1]= second;
79            dataPack[3*num+2]= comb;
80	    num++;
81            return;
82        };
83
84        int QueryEntry (int first, int second) {
85            if (!dataPack)
86                return -1;
87            for (int ii= 0; ii < num; ii++)
88                if (dataPack[3*ii] == first && dataPack[3*ii+1] == second)
89                    return (dataPack[3*ii+2]);
90            return -1;
91        }
92
93        ~DetCache () { if (dataPack) delete [] dataPack; };
94
95    private:
96        int num;
97        int *dataPack;
98};
99
100
101class SubGraph
102{
103public:
104
105    SubGraph (char *name, int ruleNo)
106    {
107        int count= strlen(name);
108        title= new char [count+1];
109        strcpy (title, name);
110	// printf ("New Rule %s\n", title);
111        ruleId= ruleNo;
112        popOp= 0;
113        numArc= 0;
114        numVertex= 0;
115        lastScope= SCOPE_ROOT;
116        startId= NewVertexId();
117        lastId= startId;
118        endId= lastId;
119        forwardList= 0;
120        backwardList= 0;
121        sortNum= 0;
122        vertexProp= 0;
123        sizeProp= 0;
124        return;
125    };
126
127    SubGraph (SubGraph *baseG);
128
129    ~SubGraph ()
130    {
131        delete [] title;
132        for (int ii= 0; ii < numArc; ii++)
133            delete arc[ii];
134        if (numArc >0)
135            delete [] arc;
136        if (forwardList)
137            delete [] forwardList;
138        if (backwardList)
139            delete [] backwardList;
140        return;
141    }
142
143    void getName (char *name, int maxlen)
144    {
145        strncpy (name, title, maxlen);
146    }
147
148    int getRuleId () {
149        return ruleId;
150    }
151
152    /**  Creates a copy of the graph */
153    void CopyFastArcs (SubGraph *subG, int startLoc, int endLoc, int offset, int headId, int newHeadId, int tailId, int newTailId);
154
155    /**  Graph construction routines */
156    void BeginScope (int scopeType, int arg1, int arg2);
157    void EndScope ();
158    int  AddItem (int inputLabel, int tagLabel);
159    int  AddTag (int tagLabel);
160    void ExpandRules (SubGraph **subList, int *lookupList, int numSubs);
161
162    /**  Reverse subgraph */
163    void ReverseGraphForOutput ();
164    void SortLanguage ();
165    void SortLanguageForMin ();
166    void SortLanguageReverse ();
167    void UpdateSortForLanguage ();
168
169    void RemoveRuleConnections ();
170    void RemoveInternalConnections ();
171    void RemoveUnreachedConnections (int startPoint, int endPoint);
172    void RemoveUnreachedConnectionsDebug (int startPoint, int endPoint);
173    void RemoveTagConnections (int startPoint, int endPoint);
174    void AddTerminalConnections ();
175
176    void Print ();
177    void PrintText ();
178    void PrintWithLabels( GRXMLDoc &p_Doc );
179    void RemapForSortedOutput ( GRXMLDoc & doc );
180
181    void WriteForwardGraphWithSemantic ( std::string & fileName, GRXMLDoc &p_Doc );
182    void WriteForwardGraphFile ( std::string & fileName, GRXMLDoc &p_Doc );
183    void WriteFile ( std::string & fileName, GRXMLDoc &p_Doc );
184    void WritePhonemeGraphFile( std::string & fileName, GRXMLDoc &p_Doc );
185    void WriteHMMGraphFile( std::string & fileName, GRXMLDoc &p_Doc );
186
187    void DeterminizeArcs ();
188    int IsDeterminized (int currId);
189    void ReduceArcsByEquivalence ();
190
191    void ExpandPhonemes ( GRXMLDoc & doc );
192    void AddLeftContexts ();
193    void AddRightContexts ();
194    void ExpandToHMMs ( GRXMLDoc & doc );
195    void ExpandIntraWordSilence ( GRXMLDoc & doc );
196    void ClearOutputs ();
197    void ShiftOutputsToLeft ();
198    void FinalProcessing ( GRXMLDoc & doc );
199
200    void DebugPrintDirective (char *dirrData);
201    void DebugPrintLabel (int labId);
202
203private:
204
205    int        NewVertexId ();
206    int        numVertex;
207
208    void       AllocateSpaceForArc ();
209    NUANArc        *CreateArc (int iLabel, int oLabel, int from, int to);
210    NUANArc        *InheritArc (NUANArc *arcBase, int id);
211    NUANArc        *InheritReverseArc (NUANArc *arcBase, int id);
212    NUANArc        *CreateCopyWithOutput (NUANArc *arcBase, int id);
213    NUANArc        *InheritReverseArcWithTag (NUANArc *arcBase, int id, int tagId);
214    int        numArc;
215    NUANArc        **arc;
216    int        sortNum;
217    int        sortRevNum;
218    int        *forwardList;
219    int        *backwardList;
220
221    int  ConnectLastScope (int beginScopeId, int endScopeId);
222    int  CloseScope ();
223    void UpdateVertexCount (int startLoc);
224
225    int FindFromIndex (int element);
226    int FindToIndex (int element);
227    void PullUpBegins (int currId, int baseId, int endId, int procLabel, int *nodeList, int currNum, int maxNum);
228    void ProcessBegins (int currId, int endId, int procLabel, int *nodeList, int currNum, int *visitMark, int maxNum);
229    void PullUpEnds (int currId, int baseId, int initialId, int procLabel, int *nodeList, int currNum, int maxNum);
230    void ProcessEnds (int currId, int initialId, int procLabel, int *nodeList, int currNum, int *visitMark, int maxNum);
231    bool PullUpTags (int currId, int baseId, int initialId, int outTag, int *nodeList, int currNum, int maxNum);
232    void ProcessTags (int currId, int initialId, int *nodeList, int currNum, int *visitMark, int maxNum);
233    void ClearDuplicateArcs ();
234
235    void RemoveRuleStarts (int startId, int endId);
236    void RemoveRuleEnds (int startId, int endId);
237    void RemoveNulls (int startPoint, int endPoint);
238    void RemoveForwardConnections (int startId, int endId);
239    void RemoveBackwardConnections (int startId, int endId);
240    void ClearLabelledConnections (int labItem);
241    void RemoveUnvisitedArcs (int initialId, int finalId);
242    void RemoveMarkedNodes (int *forwardDepth, int *reverseDepth);
243    void RemoveDiscardedArcs ();
244    void RenumberNodes ();
245
246    bool EquivalenceTestForward (int firstId, int secondId, int *equivMap);
247    void ForwardDepthData (int startId, int *depthMap, int depth);
248    void ReverseDepthData (int startId, int *depthMap, int depth);
249    void ReverseDepthOnTerminal (int *depthMap);
250
251    void MapGraphVertices (int *equivMap);
252    void IdentifyEquivalence (int *depthMap, int *equivMap);
253    void CheckForChangeAndResort (int currId, int *mapList);
254
255    void SortLanguageAtIndices (int begIx, int endIx);
256    void SortLanguageAtIndicesForMin (int begIx, int endIx);
257    void SortLanguageAtSortIndices (int begIx, int endIx);
258    bool CheckSort ();
259    bool CheckSortReverse ();
260    void RemoveDuplicatesAtNode (int rixBegin, int rixEnd);
261    void MergeVertices (int newId, int firstId, int secondId);
262    void DeterminizeAtVertex (int baseId, DetCache *cache);
263    int  PairwiseDeterminize (int baseId, int firstId, int fixStart, int secondId, int sixStart, DetCache *cache);
264    void ClearRuleIds ();
265    void ViewNode (int currId);
266
267    void ReverseMarkArcs ();
268    void ReverseMarkOutput (int currId, int initialId, int outId);
269    void MarkNodesByOutputAndClearArcs ();
270    void ExpandWordBoundaries ( GRXMLDoc & doc );
271    void AddInitialFinalSilences ( GRXMLDoc & doc );
272
273    void UpdateVisitationCache (int startNo);
274    void ClearVisitationCache ();
275    void SetupVisitationCache ();
276    void DecVisitationCache (int currId);
277    void IncVisitationCache (int currId);
278    int VisitationConsistencyCheck ();
279
280    void CreateNodeProperty () {
281        vertexProp= new int [BLKSIZE];
282        vertexMinned= new bool [BLKSIZE];
283        sizeProp= BLKSIZE;
284        for (int ii= 0; ii < sizeProp; ii++) {
285            vertexProp[ii]= 0;
286            vertexMinned[ii]= false;
287	}
288        return;
289    }
290
291    void IncNodeProperty (int vertNo) {
292        if (vertNo >= sizeProp) {
293            int newSize= (vertNo/BLKSIZE + 1)*BLKSIZE;
294            int *newPack = new int [newSize];
295            bool *newMinned = new bool [newSize];
296            int ii;
297            for (ii= 0; ii < sizeProp; ii++) {
298                newPack[ii]= vertexProp[ii];
299                newMinned[ii]= vertexMinned[ii];
300	    }
301            for (ii= sizeProp; ii < newSize; ii++) {
302                newPack[ii]= 0;
303                newMinned[ii]= false;
304	    }
305            delete [] vertexProp;
306            delete [] vertexMinned;
307            vertexProp= newPack;
308            vertexMinned= newMinned;
309	    sizeProp= newSize;
310        }
311        vertexProp[vertNo]++;
312        return;
313    }
314
315    void DecNodeProperty (int vertNo) {
316        if (vertNo < sizeProp)
317            vertexProp[vertNo]--;
318        return;
319    }
320
321    void SetNodeProperty (int vertNo, int value) {
322        if (vertNo < sizeProp)
323            vertexProp[vertNo]= value;
324        return;
325    }
326
327    void SetMinProperty (int vertNo) {
328        if (vertNo < sizeProp)
329            vertexMinned[vertNo]= true;
330        return;
331    }
332
333    int QueryNodeProperty (int vertNo) {
334        if (vertNo < sizeProp)
335            return (vertexProp[vertNo]);
336        return -1;
337    }
338
339    int QueryMinProperty (int vertNo) {
340        if (vertNo < sizeProp)
341            return (vertexMinned[vertNo]);
342        return false;
343    }
344
345    void DeleteNodeProperty () {
346        delete [] vertexProp;
347        delete [] vertexMinned;
348        vertexProp= 0;
349        vertexMinned= 0;
350    }
351
352
353    /*  Stacking of operations and related data
354    */
355    char    *title;
356    int     ruleId;
357    int     lastScope;
358    int     startId;
359    int     endId;
360    int     prevStartId;
361    int     prevEndId;
362    int     lastId;
363    int     arg1;
364    int     arg2;
365    int     arcLoc;
366    int     opStack[100000];      // TODO: remove hard coded number
367    int     popOp;
368    void    pushScope ();
369    void    popScope ();
370    int     silenceId;
371    int     intraId;
372    int     *vertexProp;          //  Vertex properties
373    bool    *vertexMinned;          //  Vertex properties
374    int     sizeProp;
375};
376
377#endif // __sub_grph_h__
378