1// class template regex -*- C++ -*-
2
3// Copyright (C) 2010-2013 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library.  This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23// <http://www.gnu.org/licenses/>.
24
25/**
26 *  @file bits/regex_nfa.h
27 *  This is an internal header file, included by other library headers.
28 *  Do not attempt to use it directly. @headername{regex}
29 */
30
31namespace std _GLIBCXX_VISIBILITY(default)
32{
33namespace __detail
34{
35_GLIBCXX_BEGIN_NAMESPACE_VERSION
36
37  /**
38   * @addtogroup regex-detail
39   * @{
40   */
41
42  /// Base class for, um, automata.  Could be an NFA or a DFA.  Your choice.
43  class _Automaton
44  {
45  public:
46    typedef unsigned int _SizeT;
47
48  public:
49    virtual
50    ~_Automaton() { }
51
52    virtual _SizeT
53    _M_sub_count() const = 0;
54
55#ifdef _GLIBCXX_DEBUG
56    virtual std::ostream&
57    _M_dot(std::ostream& __ostr) const = 0;
58#endif
59  };
60
61  /// Generic shared pointer to an automaton.
62  typedef std::shared_ptr<_Automaton> _AutomatonPtr;
63
64  /// Operation codes that define the type of transitions within the base NFA
65  /// that represents the regular expression.
66  enum _Opcode
67  {
68      _S_opcode_unknown       =   0,
69      _S_opcode_alternative   =   1,
70      _S_opcode_subexpr_begin =   4,
71      _S_opcode_subexpr_end   =   5,
72      _S_opcode_match         = 100,
73      _S_opcode_accept        = 255
74  };
75
76  /// Provides a generic facade for a templated match_results.
77  struct _Results
78  {
79    virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0;
80    virtual void _M_set_matched(int __i, bool __is_matched) = 0;
81  };
82
83  /// Tags current state (for subexpr begin/end).
84  typedef std::function<void (const _PatternCursor&, _Results&)> _Tagger;
85
86  /// Start state tag.
87  template<typename _FwdIterT, typename _TraitsT>
88    struct _StartTagger
89    {
90      explicit
91      _StartTagger(int __i)
92      : _M_index(__i)
93      { }
94
95      void
96      operator()(const _PatternCursor& __pc, _Results& __r)
97      { __r._M_set_pos(_M_index, 0, __pc); }
98
99      int       _M_index;
100    };
101
102  /// End state tag.
103  template<typename _FwdIterT, typename _TraitsT>
104    struct _EndTagger
105    {
106      explicit
107      _EndTagger(int __i)
108      : _M_index(__i)
109      { }
110
111      void
112      operator()(const _PatternCursor& __pc, _Results& __r)
113      { __r._M_set_pos(_M_index, 1, __pc); }
114
115      int       _M_index;
116      _FwdIterT _M_pos;
117    };
118
119  /// Indicates if current state matches cursor current.
120  typedef std::function<bool (const _PatternCursor&)> _Matcher;
121
122  /// Matches any character
123  inline bool
124  _AnyMatcher(const _PatternCursor&)
125  { return true; }
126
127  /// Matches a single character
128  template<typename _InIterT, typename _TraitsT>
129    struct _CharMatcher
130    {
131      typedef typename _TraitsT::char_type char_type;
132
133      explicit
134      _CharMatcher(char_type __c, const _TraitsT& __t = _TraitsT())
135      : _M_traits(__t), _M_c(_M_traits.translate(__c))
136      { }
137
138      bool
139      operator()(const _PatternCursor& __pc) const
140      {
141	typedef const _SpecializedCursor<_InIterT>& _CursorT;
142	_CursorT __c = static_cast<_CursorT>(__pc);
143	return _M_traits.translate(__c._M_current()) == _M_c;
144      }
145
146      const _TraitsT& _M_traits;
147      char_type       _M_c;
148    };
149
150  /// Matches a character range (bracket expression)
151  template<typename _InIterT, typename _TraitsT>
152    struct _RangeMatcher
153    {
154      typedef typename _TraitsT::char_type _CharT;
155      typedef std::basic_string<_CharT>    _StringT;
156
157      explicit
158      _RangeMatcher(bool __is_non_matching, const _TraitsT& __t = _TraitsT())
159      : _M_traits(__t), _M_is_non_matching(__is_non_matching)
160      { }
161
162      bool
163      operator()(const _PatternCursor& __pc) const
164      {
165	typedef const _SpecializedCursor<_InIterT>& _CursorT;
166	_CursorT __c = static_cast<_CursorT>(__pc);
167	return true;
168      }
169
170      void
171      _M_add_char(_CharT __c)
172      { }
173
174      void
175      _M_add_collating_element(const _StringT& __s)
176      { }
177
178      void
179      _M_add_equivalence_class(const _StringT& __s)
180      { }
181
182      void
183      _M_add_character_class(const _StringT& __s)
184      { }
185
186      void
187      _M_make_range()
188      { }
189
190      const _TraitsT& _M_traits;
191      bool            _M_is_non_matching;
192    };
193
194  /// Identifies a state in the NFA.
195  typedef int _StateIdT;
196
197  /// The special case in which a state identifier is not an index.
198  static const _StateIdT _S_invalid_state_id  = -1;
199
200
201  /**
202   * @brief struct _State
203   *
204   * An individual state in an NFA
205   *
206   * In this case a "state" is an entry in the NFA definition coupled
207   * with its outgoing transition(s).  All states have a single outgoing
208   * transition, except for accepting states (which have no outgoing
209   * transitions) and alt states, which have two outgoing transitions.
210   */
211  struct _State
212  {
213    typedef int  _OpcodeT;
214
215    _OpcodeT     _M_opcode;    // type of outgoing transition
216    _StateIdT    _M_next;      // outgoing transition
217    _StateIdT    _M_alt;       // for _S_opcode_alternative
218    unsigned int _M_subexpr;   // for _S_opcode_subexpr_*
219    _Tagger      _M_tagger;    // for _S_opcode_subexpr_*
220    _Matcher     _M_matches;   // for _S_opcode_match
221
222    explicit _State(_OpcodeT __opcode)
223    : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
224    { }
225
226    _State(const _Matcher& __m)
227    : _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id), _M_matches(__m)
228    { }
229
230    _State(_OpcodeT __opcode, unsigned int __s, const _Tagger& __t)
231    : _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__s),
232      _M_tagger(__t)
233    { }
234
235    _State(_StateIdT __next, _StateIdT __alt)
236    : _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt)
237    { }
238
239#ifdef _GLIBCXX_DEBUG
240    std::ostream&
241    _M_print(std::ostream& ostr) const;
242
243    // Prints graphviz dot commands for state.
244    std::ostream&
245    _M_dot(std::ostream& __ostr, _StateIdT __id) const;
246#endif
247  };
248
249
250  /// The Grep Matcher works on sets of states.  Here are sets of states.
251  typedef std::set<_StateIdT> _StateSet;
252
253  /**
254   * @brief struct _Nfa
255   *
256   * A collection of all states making up an NFA.
257   *
258   * An NFA is a 4-tuple M = (K, S, s, F), where
259   *    K is a finite set of states,
260   *    S is the alphabet of the NFA,
261   *    s is the initial state,
262   *    F is a set of final (accepting) states.
263   *
264   * This NFA class is templated on S, a type that will hold values of the
265   * underlying alphabet (without regard to semantics of that alphabet).  The
266   * other elements of the tuple are generated during construction of the NFA
267   * and are available through accessor member functions.
268   */
269  class _Nfa
270  : public _Automaton, public std::vector<_State>
271  {
272  public:
273    typedef _State                              _StateT;
274    typedef unsigned int                        _SizeT;
275    typedef regex_constants::syntax_option_type _FlagT;
276
277    _Nfa(_FlagT __f)
278    : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0)
279    { }
280
281    ~_Nfa()
282    { }
283
284    _FlagT
285    _M_options() const
286    { return _M_flags; }
287
288    _StateIdT
289    _M_start() const
290    { return _M_start_state; }
291
292    const _StateSet&
293    _M_final_states() const
294    { return _M_accepting_states; }
295
296    _SizeT
297    _M_sub_count() const
298    { return _M_subexpr_count; }
299
300    _StateIdT
301    _M_insert_accept()
302    {
303      this->push_back(_StateT(_S_opcode_accept));
304      _M_accepting_states.insert(this->size()-1);
305      return this->size()-1;
306    }
307
308    _StateIdT
309    _M_insert_alt(_StateIdT __next, _StateIdT __alt)
310    {
311      this->push_back(_StateT(__next, __alt));
312      return this->size()-1;
313    }
314
315    _StateIdT
316    _M_insert_matcher(_Matcher __m)
317    {
318      this->push_back(_StateT(__m));
319      return this->size()-1;
320    }
321
322    _StateIdT
323    _M_insert_subexpr_begin(const _Tagger& __t)
324    {
325      this->push_back(_StateT(_S_opcode_subexpr_begin, _M_subexpr_count++,
326			      __t));
327      return this->size()-1;
328    }
329
330    _StateIdT
331    _M_insert_subexpr_end(unsigned int __i, const _Tagger& __t)
332    {
333      this->push_back(_StateT(_S_opcode_subexpr_end, __i, __t));
334      return this->size()-1;
335    }
336
337#ifdef _GLIBCXX_DEBUG
338    std::ostream&
339    _M_dot(std::ostream& __ostr) const;
340#endif
341
342  private:
343    _FlagT     _M_flags;
344    _StateIdT  _M_start_state;
345    _StateSet  _M_accepting_states;
346    _SizeT     _M_subexpr_count;
347  };
348
349  /// Describes a sequence of one or more %_State, its current start
350  /// and end(s).  This structure contains fragments of an NFA during
351  /// construction.
352  class _StateSeq
353  {
354  public:
355    // Constructs a single-node sequence
356    _StateSeq(_Nfa& __ss, _StateIdT __s, _StateIdT __e = _S_invalid_state_id)
357    : _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e)
358    { }
359    // Constructs a split sequence from two other sequencces
360    _StateSeq(const _StateSeq& __e1, const _StateSeq& __e2)
361    : _M_nfa(__e1._M_nfa),
362      _M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)),
363      _M_end1(__e1._M_end1), _M_end2(__e2._M_end1)
364    { }
365
366    // Constructs a split sequence from a single sequence
367    _StateSeq(const _StateSeq& __e, _StateIdT __id)
368    : _M_nfa(__e._M_nfa),
369      _M_start(_M_nfa._M_insert_alt(__id, __e._M_start)),
370      _M_end1(__id), _M_end2(__e._M_end1)
371    { }
372
373    // Constructs a copy of a %_StateSeq
374    _StateSeq(const _StateSeq& __rhs)
375    : _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start),
376      _M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2)
377    { }
378
379
380    _StateSeq& operator=(const _StateSeq& __rhs);
381
382    _StateIdT
383    _M_front() const
384    { return _M_start; }
385
386    // Extends a sequence by one.
387    void
388    _M_push_back(_StateIdT __id);
389
390    // Extends and maybe joins a sequence.
391    void
392    _M_append(_StateIdT __id);
393
394    void
395    _M_append(_StateSeq& __rhs);
396
397    // Clones an entire sequence.
398    _StateIdT
399    _M_clone();
400
401  private:
402    _Nfa&     _M_nfa;
403    _StateIdT _M_start;
404    _StateIdT _M_end1;
405    _StateIdT _M_end2;
406
407  };
408
409 //@} regex-detail
410_GLIBCXX_END_NAMESPACE_VERSION
411} // namespace __detail
412} // namespace std
413
414#include <bits/regex_nfa.tcc>
415
416