1// class template regex -*- C++ -*-
2
3// Copyright (C) 2013-2014 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library.  This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23// <http://www.gnu.org/licenses/>.
24
25/**
26 *  @file bits/regex_automaton.h
27 *  This is an internal header file, included by other library headers.
28 *  Do not attempt to use it directly. @headername{regex}
29 */
30
31namespace std _GLIBCXX_VISIBILITY(default)
32{
33namespace __detail
34{
35_GLIBCXX_BEGIN_NAMESPACE_VERSION
36
37  /**
38   *  @defgroup regex-detail Base and Implementation Classes
39   *  @ingroup regex
40   *  @{
41   */
42
43  typedef long _StateIdT;
44  static const _StateIdT _S_invalid_state_id  = -1;
45
46  template<typename _CharT>
47    using _Matcher = std::function<bool (_CharT)>;
48
49  /// Operation codes that define the type of transitions within the base NFA
50  /// that represents the regular expression.
51  enum _Opcode : int
52  {
53      _S_opcode_unknown,
54      _S_opcode_alternative,
55      _S_opcode_backref,
56      _S_opcode_line_begin_assertion,
57      _S_opcode_line_end_assertion,
58      _S_opcode_word_boundary,
59      _S_opcode_subexpr_lookahead,
60      _S_opcode_subexpr_begin,
61      _S_opcode_subexpr_end,
62      _S_opcode_dummy,
63      _S_opcode_match,
64      _S_opcode_accept,
65  };
66
67  struct _State_base
68  {
69    _Opcode      _M_opcode;           // type of outgoing transition
70    _StateIdT    _M_next;             // outgoing transition
71    union // Since they are mutually exclusive.
72    {
73      size_t _M_subexpr;        // for _S_opcode_subexpr_*
74      size_t _M_backref_index;  // for _S_opcode_backref
75      struct
76      {
77	// for _S_opcode_alternative.
78	_StateIdT  _M_quant_index;
79	// for _S_opcode_alternative or _S_opcode_subexpr_lookahead
80	_StateIdT  _M_alt;
81	// for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or
82	// quantifiers (ungreedy if set true)
83	bool       _M_neg;
84      };
85    };
86
87    explicit _State_base(_Opcode __opcode)
88    : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
89    { }
90
91  protected:
92    ~_State_base() = default;
93
94  public:
95#ifdef _GLIBCXX_DEBUG
96    std::ostream&
97    _M_print(std::ostream& ostr) const;
98
99    // Prints graphviz dot commands for state.
100    std::ostream&
101    _M_dot(std::ostream& __ostr, _StateIdT __id) const;
102#endif
103  };
104
105  template<typename _TraitsT>
106    struct _State : _State_base
107    {
108      typedef _Matcher<typename _TraitsT::char_type> _MatcherT;
109
110      _MatcherT      _M_matches;        // for _S_opcode_match
111
112      explicit _State(_Opcode __opcode) : _State_base(__opcode) { }
113    };
114
115  struct _NFA_base
116  {
117    typedef size_t                              _SizeT;
118    typedef regex_constants::syntax_option_type _FlagT;
119
120    explicit
121    _NFA_base(_FlagT __f)
122    : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
123    _M_quant_count(0), _M_has_backref(false)
124    { }
125
126    _NFA_base(_NFA_base&&) = default;
127
128  protected:
129    ~_NFA_base() = default;
130
131  public:
132    _FlagT
133    _M_options() const
134    { return _M_flags; }
135
136    _StateIdT
137    _M_start() const
138    { return _M_start_state; }
139
140    _SizeT
141    _M_sub_count() const
142    { return _M_subexpr_count; }
143
144    std::vector<size_t>       _M_paren_stack;
145    _FlagT                    _M_flags;
146    _StateIdT                 _M_start_state;
147    _SizeT                    _M_subexpr_count;
148    _SizeT                    _M_quant_count;
149    bool                      _M_has_backref;
150  };
151
152  template<typename _TraitsT>
153    struct _NFA
154    : _NFA_base, std::vector<_State<_TraitsT>>
155    {
156      typedef _State<_TraitsT>				_StateT;
157      typedef _Matcher<typename _TraitsT::char_type>	_MatcherT;
158
159      using _NFA_base::_NFA_base;
160
161      // for performance reasons _NFA objects should only be moved not copied
162      _NFA(const _NFA&) = delete;
163      _NFA(_NFA&&) = default;
164
165      _StateIdT
166      _M_insert_accept()
167      {
168	auto __ret = _M_insert_state(_StateT(_S_opcode_accept));
169	return __ret;
170      }
171
172      _StateIdT
173      _M_insert_alt(_StateIdT __next, _StateIdT __alt, bool __neg)
174      {
175	_StateT __tmp(_S_opcode_alternative);
176	// It labels every quantifier to make greedy comparison easier in BFS
177	// approach.
178	__tmp._M_quant_index = this->_M_quant_count++;
179	__tmp._M_next = __next;
180	__tmp._M_alt = __alt;
181	__tmp._M_neg = __neg;
182	return _M_insert_state(std::move(__tmp));
183      }
184
185      _StateIdT
186      _M_insert_matcher(_MatcherT __m)
187      {
188	_StateT __tmp(_S_opcode_match);
189	__tmp._M_matches = std::move(__m);
190	return _M_insert_state(std::move(__tmp));
191      }
192
193      _StateIdT
194      _M_insert_subexpr_begin()
195      {
196	auto __id = this->_M_subexpr_count++;
197	this->_M_paren_stack.push_back(__id);
198	_StateT __tmp(_S_opcode_subexpr_begin);
199	__tmp._M_subexpr = __id;
200	return _M_insert_state(std::move(__tmp));
201      }
202
203      _StateIdT
204      _M_insert_subexpr_end()
205      {
206	_StateT __tmp(_S_opcode_subexpr_end);
207	__tmp._M_subexpr = this->_M_paren_stack.back();
208	this->_M_paren_stack.pop_back();
209	return _M_insert_state(std::move(__tmp));
210      }
211
212      _StateIdT
213      _M_insert_backref(size_t __index);
214
215      _StateIdT
216      _M_insert_line_begin()
217      { return _M_insert_state(_StateT(_S_opcode_line_begin_assertion)); }
218
219      _StateIdT
220      _M_insert_line_end()
221      { return _M_insert_state(_StateT(_S_opcode_line_end_assertion)); }
222
223      _StateIdT
224      _M_insert_word_bound(bool __neg)
225      {
226	_StateT __tmp(_S_opcode_word_boundary);
227	__tmp._M_neg = __neg;
228	return _M_insert_state(std::move(__tmp));
229      }
230
231      _StateIdT
232      _M_insert_lookahead(_StateIdT __alt, bool __neg)
233      {
234	_StateT __tmp(_S_opcode_subexpr_lookahead);
235	__tmp._M_alt = __alt;
236	__tmp._M_neg = __neg;
237	return _M_insert_state(std::move(__tmp));
238      }
239
240      _StateIdT
241      _M_insert_dummy()
242      { return _M_insert_state(_StateT(_S_opcode_dummy)); }
243
244      _StateIdT
245      _M_insert_state(_StateT __s)
246      {
247	this->push_back(std::move(__s));
248	return this->size()-1;
249      }
250
251      // Eliminate dummy node in this NFA to make it compact.
252      void
253      _M_eliminate_dummy();
254
255#ifdef _GLIBCXX_DEBUG
256      std::ostream&
257      _M_dot(std::ostream& __ostr) const;
258#endif
259    };
260
261  /// Describes a sequence of one or more %_State, its current start
262  /// and end(s).  This structure contains fragments of an NFA during
263  /// construction.
264  template<typename _TraitsT>
265    class _StateSeq
266    {
267    public:
268      typedef _NFA<_TraitsT> _RegexT;
269
270    public:
271      _StateSeq(_RegexT& __nfa, _StateIdT __s)
272      : _M_nfa(__nfa), _M_start(__s), _M_end(__s)
273      { }
274
275      _StateSeq(_RegexT& __nfa, _StateIdT __s, _StateIdT __end)
276      : _M_nfa(__nfa), _M_start(__s), _M_end(__end)
277      { }
278
279      // Append a state on *this and change *this to the new sequence.
280      void
281      _M_append(_StateIdT __id)
282      {
283	_M_nfa[_M_end]._M_next = __id;
284	_M_end = __id;
285      }
286
287      // Append a sequence on *this and change *this to the new sequence.
288      void
289      _M_append(const _StateSeq& __s)
290      {
291	_M_nfa[_M_end]._M_next = __s._M_start;
292	_M_end = __s._M_end;
293      }
294
295      // Clones an entire sequence.
296      _StateSeq
297      _M_clone();
298
299    public:
300      _RegexT&  _M_nfa;
301      _StateIdT _M_start;
302      _StateIdT _M_end;
303    };
304
305 //@} regex-detail
306_GLIBCXX_END_NAMESPACE_VERSION
307} // namespace __detail
308} // namespace std
309
310#include <bits/regex_automaton.tcc>
311