1// class template regex -*- C++ -*-
2
3// Copyright (C) 2010-2014 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library.  This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23// <http://www.gnu.org/licenses/>.
24
25/**
26 *  @file bits/regex_compiler.h
27 *  This is an internal header file, included by other library headers.
28 *  Do not attempt to use it directly. @headername{regex}
29 */
30
31namespace std _GLIBCXX_VISIBILITY(default)
32{
33namespace __detail
34{
35_GLIBCXX_BEGIN_NAMESPACE_VERSION
36
37  /**
38   * @addtogroup regex-detail
39   * @{
40   */
41
42  template<typename, bool, bool>
43    struct _BracketMatcher;
44
45  /**
46   * @brief Builds an NFA from an input iterator interval.
47   *
48   * The %_TraitsT type should fulfill requirements [28.3].
49   */
50  template<typename _TraitsT>
51    class _Compiler
52    {
53    public:
54      typedef typename _TraitsT::char_type        _CharT;
55      typedef const _CharT*                       _IterT;
56      typedef _NFA<_TraitsT>              	  _RegexT;
57      typedef regex_constants::syntax_option_type _FlagT;
58
59      _Compiler(_IterT __b, _IterT __e,
60		const _TraitsT& __traits, _FlagT __flags);
61
62      std::shared_ptr<_RegexT>
63      _M_get_nfa()
64      { return make_shared<_RegexT>(std::move(_M_nfa)); }
65
66    private:
67      typedef _Scanner<_CharT>               _ScannerT;
68      typedef typename _TraitsT::string_type _StringT;
69      typedef typename _ScannerT::_TokenT    _TokenT;
70      typedef _StateSeq<_TraitsT>            _StateSeqT;
71      typedef std::stack<_StateSeqT>         _StackT;
72      typedef std::ctype<_CharT>             _CtypeT;
73
74      // accepts a specific token or returns false.
75      bool
76      _M_match_token(_TokenT __token);
77
78      void
79      _M_disjunction();
80
81      void
82      _M_alternative();
83
84      bool
85      _M_term();
86
87      bool
88      _M_assertion();
89
90      bool
91      _M_quantifier();
92
93      bool
94      _M_atom();
95
96      bool
97      _M_bracket_expression();
98
99      template<bool __icase, bool __collate>
100	void
101	_M_insert_any_matcher_ecma();
102
103      template<bool __icase, bool __collate>
104	void
105	_M_insert_any_matcher_posix();
106
107      template<bool __icase, bool __collate>
108	void
109	_M_insert_char_matcher();
110
111      template<bool __icase, bool __collate>
112	void
113	_M_insert_character_class_matcher();
114
115      template<bool __icase, bool __collate>
116	void
117	_M_insert_bracket_matcher(bool __neg);
118
119      template<bool __icase, bool __collate>
120	void
121	_M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>&
122			   __matcher);
123
124      int
125      _M_cur_int_value(int __radix);
126
127      bool
128      _M_try_char();
129
130      _StateSeqT
131      _M_pop()
132      {
133	auto ret = _M_stack.top();
134	_M_stack.pop();
135	return ret;
136      }
137
138      _FlagT          _M_flags;
139      const _TraitsT& _M_traits;
140      const _CtypeT&  _M_ctype;
141      _ScannerT       _M_scanner;
142      _RegexT         _M_nfa;
143      _StringT        _M_value;
144      _StackT         _M_stack;
145    };
146
147  template<typename _TraitsT>
148    inline std::shared_ptr<_NFA<_TraitsT>>
149    __compile_nfa(const typename _TraitsT::char_type* __first,
150		  const typename _TraitsT::char_type* __last,
151		  const _TraitsT& __traits,
152		  regex_constants::syntax_option_type __flags)
153    {
154      using _Cmplr = _Compiler<_TraitsT>;
155      return _Cmplr(__first, __last, __traits, __flags)._M_get_nfa();
156    }
157
158  // [28.13.14]
159  template<typename _TraitsT, bool __icase, bool __collate>
160    class _RegexTranslator
161    {
162    public:
163      typedef typename _TraitsT::char_type	      _CharT;
164      typedef typename _TraitsT::string_type	      _StringT;
165      typedef typename std::conditional<__collate,
166					_StringT,
167					_CharT>::type _StrTransT;
168
169      explicit
170      _RegexTranslator(const _TraitsT& __traits)
171      : _M_traits(__traits)
172      { }
173
174      _CharT
175      _M_translate(_CharT __ch) const
176      {
177	if (__icase)
178	  return _M_traits.translate_nocase(__ch);
179	else if (__collate)
180	  return _M_traits.translate(__ch);
181	else
182	  return __ch;
183      }
184
185      _StrTransT
186      _M_transform(_CharT __ch) const
187      {
188	return _M_transform_impl(__ch, typename integral_constant<bool,
189				 __collate>::type());
190      }
191
192    private:
193      _StrTransT
194      _M_transform_impl(_CharT __ch, false_type) const
195      { return __ch; }
196
197      _StrTransT
198      _M_transform_impl(_CharT __ch, true_type) const
199      {
200	_StrTransT __str = _StrTransT(1, _M_translate(__ch));
201	return _M_traits.transform(__str.begin(), __str.end());
202      }
203
204      const _TraitsT& _M_traits;
205    };
206
207  template<typename _TraitsT>
208    class _RegexTranslator<_TraitsT, false, false>
209    {
210    public:
211      typedef typename _TraitsT::char_type _CharT;
212      typedef _CharT                       _StrTransT;
213
214      explicit
215      _RegexTranslator(const _TraitsT& __traits)
216      { }
217
218      _CharT
219      _M_translate(_CharT __ch) const
220      { return __ch; }
221
222      _StrTransT
223      _M_transform(_CharT __ch) const
224      { return __ch; }
225    };
226
227  template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate>
228    struct _AnyMatcher;
229
230  template<typename _TraitsT, bool __icase, bool __collate>
231    struct _AnyMatcher<_TraitsT, false, __icase, __collate>
232    {
233      typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
234      typedef typename _TransT::_CharT                       _CharT;
235
236      explicit
237      _AnyMatcher(const _TraitsT& __traits)
238      : _M_translator(__traits)
239      { }
240
241      bool
242      operator()(_CharT __ch) const
243      {
244	static auto __nul = _M_translator._M_translate('\0');
245	return _M_translator._M_translate(__ch) != __nul;
246      }
247
248      _TransT _M_translator;
249    };
250
251  template<typename _TraitsT, bool __icase, bool __collate>
252    struct _AnyMatcher<_TraitsT, true, __icase, __collate>
253    {
254      typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
255      typedef typename _TransT::_CharT                       _CharT;
256
257      explicit
258      _AnyMatcher(const _TraitsT& __traits)
259      : _M_translator(__traits)
260      { }
261
262      bool
263      operator()(_CharT __ch) const
264      { return _M_apply(__ch, typename is_same<_CharT, char>::type()); }
265
266      bool
267      _M_apply(_CharT __ch, true_type) const
268      {
269	auto __c = _M_translator._M_translate(__ch);
270	auto __n = _M_translator._M_translate('\n');
271	auto __r = _M_translator._M_translate('\r');
272	return __c != __n && __c != __r;
273      }
274
275      bool
276      _M_apply(_CharT __ch, false_type) const
277      {
278	auto __c = _M_translator._M_translate(__ch);
279	auto __n = _M_translator._M_translate('\n');
280	auto __r = _M_translator._M_translate('\r');
281	auto __u2028 = _M_translator._M_translate(u'\u2028');
282	auto __u2029 = _M_translator._M_translate(u'\u2029');
283	return __c != __n && __c != __r && __c != __u2028 && __c != __u2029;
284      }
285
286      _TransT _M_translator;
287    };
288
289  template<typename _TraitsT, bool __icase, bool __collate>
290    struct _CharMatcher
291    {
292      typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
293      typedef typename _TransT::_CharT                       _CharT;
294
295      _CharMatcher(_CharT __ch, const _TraitsT& __traits)
296      : _M_translator(__traits), _M_ch(_M_translator._M_translate(__ch))
297      { }
298
299      bool
300      operator()(_CharT __ch) const
301      { return _M_ch == _M_translator._M_translate(__ch); }
302
303      _TransT _M_translator;
304      _CharT  _M_ch;
305    };
306
307  /// Matches a character range (bracket expression)
308  template<typename _TraitsT, bool __icase, bool __collate>
309    struct _BracketMatcher
310    {
311    public:
312      typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
313      typedef typename _TransT::_CharT                       _CharT;
314      typedef typename _TransT::_StrTransT                   _StrTransT;
315      typedef typename _TraitsT::string_type                 _StringT;
316      typedef typename _TraitsT::char_class_type             _CharClassT;
317
318    public:
319      _BracketMatcher(bool __is_non_matching,
320		      const _TraitsT& __traits)
321      : _M_class_set(0), _M_translator(__traits), _M_traits(__traits),
322      _M_is_non_matching(__is_non_matching)
323#ifdef _GLIBCXX_DEBUG
324      , _M_is_ready(false)
325#endif
326      { }
327
328      bool
329      operator()(_CharT __ch) const
330      {
331	_GLIBCXX_DEBUG_ASSERT(_M_is_ready);
332	return _M_apply(__ch, _IsChar());
333      }
334
335      void
336      _M_add_char(_CharT __c)
337      {
338	_M_char_set.push_back(_M_translator._M_translate(__c));
339#ifdef _GLIBCXX_DEBUG
340	_M_is_ready = false;
341#endif
342      }
343
344      void
345      _M_add_collating_element(const _StringT& __s)
346      {
347	auto __st = _M_traits.lookup_collatename(__s.data(),
348						 __s.data() + __s.size());
349	if (__st.empty())
350	  __throw_regex_error(regex_constants::error_collate);
351	_M_char_set.push_back(_M_translator._M_translate(__st[0]));
352#ifdef _GLIBCXX_DEBUG
353	_M_is_ready = false;
354#endif
355      }
356
357      void
358      _M_add_equivalence_class(const _StringT& __s)
359      {
360	auto __st = _M_traits.lookup_collatename(__s.data(),
361						 __s.data() + __s.size());
362	if (__st.empty())
363	  __throw_regex_error(regex_constants::error_collate);
364	__st = _M_traits.transform_primary(__st.data(),
365					   __st.data() + __st.size());
366	_M_equiv_set.push_back(__st);
367#ifdef _GLIBCXX_DEBUG
368	_M_is_ready = false;
369#endif
370      }
371
372      // __neg should be true for \D, \S and \W only.
373      void
374      _M_add_character_class(const _StringT& __s, bool __neg)
375      {
376	auto __mask = _M_traits.lookup_classname(__s.data(),
377						 __s.data() + __s.size(),
378						 __icase);
379	if (__mask == 0)
380	  __throw_regex_error(regex_constants::error_ctype);
381	if (!__neg)
382	  _M_class_set |= __mask;
383	else
384	  _M_neg_class_set.push_back(__mask);
385#ifdef _GLIBCXX_DEBUG
386	_M_is_ready = false;
387#endif
388      }
389
390      void
391      _M_make_range(_CharT __l, _CharT __r)
392      {
393	_M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
394					 _M_translator._M_transform(__r)));
395#ifdef _GLIBCXX_DEBUG
396	_M_is_ready = false;
397#endif
398      }
399
400      void
401      _M_ready()
402      {
403	_M_make_cache(_IsChar());
404#ifdef _GLIBCXX_DEBUG
405	_M_is_ready = true;
406#endif
407      }
408
409    private:
410      typedef typename is_same<_CharT, char>::type _IsChar;
411      struct _Dummy { };
412      typedef typename conditional<_IsChar::value,
413				   std::bitset<1 << (8 * sizeof(_CharT))>,
414				   _Dummy>::type _CacheT;
415      typedef typename make_unsigned<_CharT>::type _UnsignedCharT;
416
417    private:
418      bool
419      _M_apply(_CharT __ch, false_type) const;
420
421      bool
422      _M_apply(_CharT __ch, true_type) const
423      { return _M_cache[static_cast<_UnsignedCharT>(__ch)]; }
424
425      void
426      _M_make_cache(true_type)
427      {
428	for (int __i = 0; __i < _M_cache.size(); __i++)
429	  _M_cache[static_cast<_UnsignedCharT>(__i)] =
430	    _M_apply(__i, false_type());
431      }
432
433      void
434      _M_make_cache(false_type)
435      { }
436
437    private:
438      _CacheT                                   _M_cache;
439      std::vector<_CharT>                       _M_char_set;
440      std::vector<_StringT>                     _M_equiv_set;
441      std::vector<pair<_StrTransT, _StrTransT>> _M_range_set;
442      std::vector<_CharClassT>                  _M_neg_class_set;
443      _CharClassT                               _M_class_set;
444      _TransT                                   _M_translator;
445      const _TraitsT&                           _M_traits;
446      bool                                      _M_is_non_matching;
447#ifdef _GLIBCXX_DEBUG
448      bool                                      _M_is_ready;
449#endif
450    };
451
452 //@} regex-detail
453_GLIBCXX_END_NAMESPACE_VERSION
454} // namespace __detail
455} // namespace std
456
457#include <bits/regex_compiler.tcc>
458