1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 2001-2006, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7#ifndef RBBINODE_H
8#define RBBINODE_H
9
10#include "unicode/utypes.h"
11#include "unicode/uobject.h"
12
13//
14//  class RBBINode
15//
16//                    Represents a node in the parse tree generated when reading
17//                    a rule file.
18//
19
20U_NAMESPACE_BEGIN
21
22class    UnicodeSet;
23class    UVector;
24
25class RBBINode : public UMemory {
26    public:
27        enum NodeType {
28            setRef,
29            uset,
30            varRef,
31            leafChar,
32            lookAhead,
33            tag,
34            endMark,
35            opStart,
36            opCat,
37            opOr,
38            opStar,
39            opPlus,
40            opQuestion,
41            opBreak,
42            opReverse,
43            opLParen
44        };
45
46        enum OpPrecedence {
47            precZero,
48            precStart,
49            precLParen,
50            precOpOr,
51            precOpCat
52        };
53
54        NodeType      fType;
55        RBBINode      *fParent;
56        RBBINode      *fLeftChild;
57        RBBINode      *fRightChild;
58        UnicodeSet    *fInputSet;           // For uset nodes only.
59        OpPrecedence  fPrecedence;          // For binary ops only.
60
61        UnicodeString fText;                // Text corresponding to this node.
62                                            //   May be lazily evaluated when (if) needed
63                                            //   for some node types.
64        int           fFirstPos;            // Position in the rule source string of the
65                                            //   first text associated with the node.
66                                            //   If there's a left child, this will be the same
67                                            //   as that child's left pos.
68        int           fLastPos;             //  Last position in the rule source string
69                                            //    of any text associated with this node.
70                                            //    If there's a right child, this will be the same
71                                            //    as that child's last postion.
72
73        UBool         fNullable;            // See Aho.
74        int32_t       fVal;                 // For leafChar nodes, the value.
75                                            //   Values are the character category,
76                                            //   corresponds to columns in the final
77                                            //   state transition table.
78
79        UBool         fLookAheadEnd;        // For endMark nodes, set TRUE if
80                                            //   marking the end of a look-ahead rule.
81
82        UVector       *fFirstPosSet;
83        UVector       *fLastPosSet;         // TODO: rename fFirstPos & fLastPos to avoid confusion.
84        UVector       *fFollowPos;
85
86
87        RBBINode(NodeType t);
88        RBBINode(const RBBINode &other);
89        ~RBBINode();
90
91        RBBINode    *cloneTree();
92        RBBINode    *flattenVariables();
93        void         flattenSets();
94        void         findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status);
95
96#ifdef RBBI_DEBUG
97        void        printNode();
98        void        printTree(UBool withHeading);
99#endif
100
101    private:
102        RBBINode &operator = (const RBBINode &other); // No defs.
103        UBool operator == (const RBBINode &other);    // Private, so these functions won't accidently be used.
104
105#ifdef RBBI_DEBUG
106        int           fSerialNum;           //  Debugging aids.
107#endif
108};
109
110#ifdef RBBI_DEBUG
111U_CFUNC void
112RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth=0);
113#endif
114
115U_NAMESPACE_END
116
117#endif
118
119