1/*
2 * Copyright 2017 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "RegexParser.h"
9
10#include "LexUtil.h"
11
12RegexNode RegexParser::parse(std::string source) {
13    fSource = source;
14    fIndex = 0;
15    ASSERT(fStack.size() == 0);
16    this->regex();
17    ASSERT(fStack.size() == 1);
18    ASSERT(fIndex == source.size());
19    return this->pop();
20}
21
22char RegexParser::peek() {
23    if (fIndex >= fSource.size()) {
24        return END;
25    }
26    return fSource[fIndex];
27}
28
29void RegexParser::expect(char c) {
30    if (this->peek() != c) {
31        printf("expected '%c' at index %d, but found '%c'", c, (int) fIndex, this->peek());
32        exit(1);
33    }
34    ++fIndex;
35}
36
37RegexNode RegexParser::pop() {
38    RegexNode result = fStack.top();
39    fStack.pop();
40    return result;
41}
42
43void RegexParser::term() {
44    switch (this->peek()) {
45        case '(': this->group();  break;
46        case '[': this->set();    break;
47        case '.': this->dot();    break;
48        default: this->literal();
49    }
50}
51
52void RegexParser::quantifiedTerm() {
53    this->term();
54    switch (this->peek()) {
55        case '*': fStack.push(RegexNode(RegexNode::kStar_Kind,     this->pop())); ++fIndex; break;
56        case '+': fStack.push(RegexNode(RegexNode::kPlus_Kind,     this->pop())); ++fIndex; break;
57        case '?': fStack.push(RegexNode(RegexNode::kQuestion_Kind, this->pop())); ++fIndex; break;
58        default:  break;
59    }
60}
61
62void RegexParser::sequence() {
63    this->quantifiedTerm();
64    for (;;) {
65        switch (this->peek()) {
66            case END: // fall through
67            case '|': // fall through
68            case ')': return;
69            default:
70                this->sequence();
71                RegexNode right = this->pop();
72                RegexNode left = this->pop();
73                fStack.emplace(RegexNode::kConcat_Kind, std::move(left), std::move(right));
74        }
75    }
76}
77
78RegexNode RegexParser::escapeSequence(char c) {
79    switch (c) {
80        case 'n': return RegexNode(RegexNode::kChar_Kind, '\n');
81        case 'r': return RegexNode(RegexNode::kChar_Kind, '\r');
82        case 't': return RegexNode(RegexNode::kChar_Kind, '\t');
83        case 's': return RegexNode(RegexNode::kCharset_Kind, " \t\n\r");
84        default:  return RegexNode(RegexNode::kChar_Kind, c);
85    }
86}
87
88void RegexParser::literal() {
89    char c = this->peek();
90    if (c == '\\') {
91        ++fIndex;
92        fStack.push(this->escapeSequence(peek()));
93        ++fIndex;
94    }
95    else {
96        fStack.push(RegexNode(RegexNode::kChar_Kind, c));
97        ++fIndex;
98    }
99}
100
101void RegexParser::dot() {
102    this->expect('.');
103    fStack.push(RegexNode(RegexNode::kDot_Kind));
104}
105
106void RegexParser::group() {
107    this->expect('(');
108    this->regex();
109    this->expect(')');
110}
111
112void RegexParser::setItem() {
113    this->literal();
114    if (this->peek() == '-') {
115        ++fIndex;
116        if (peek() == ']') {
117            fStack.push(RegexNode(RegexNode::kChar_Kind, '-'));
118        }
119        else {
120            literal();
121            RegexNode end = this->pop();
122            ASSERT(end.fKind == RegexNode::kChar_Kind);
123            RegexNode start = this->pop();
124            ASSERT(start.fKind == RegexNode::kChar_Kind);
125            fStack.push(RegexNode(RegexNode::kRange_Kind, std::move(start), std::move(end)));
126        }
127    }
128}
129
130void RegexParser::set() {
131    expect('[');
132    size_t depth = fStack.size();
133    RegexNode set(RegexNode::kCharset_Kind);
134    if (this->peek() == '^') {
135        ++fIndex;
136        set.fPayload.fBool = true;
137    }
138    else {
139        set.fPayload.fBool = false;
140    }
141    for (;;) {
142        switch (this->peek()) {
143            case ']':
144                ++fIndex;
145                while (fStack.size() > depth) {
146                    set.fChildren.push_back(this->pop());
147                }
148                fStack.push(std::move(set));
149                return;
150            case END:
151                printf("unterminated character set\n");
152                exit(1);
153            default:
154                this->setItem();
155        }
156    }
157}
158
159void RegexParser::regex() {
160    this->sequence();
161    switch (this->peek()) {
162        case '|': {
163            ++fIndex;
164            this->regex();
165            RegexNode right = this->pop();
166            RegexNode left = this->pop();
167            fStack.push(RegexNode(RegexNode::kOr_Kind, left, right));
168            break;
169        }
170        case END: // fall through
171        case ')':
172            return;
173        default:
174            ASSERT(false);
175    }
176}
177