1// class template regex -*- C++ -*- 2 3// Copyright (C) 2013-2014 Free Software Foundation, Inc. 4// 5// This file is part of the GNU ISO C++ Library. This library is free 6// software; you can redistribute it and/or modify it under the 7// terms of the GNU General Public License as published by the 8// Free Software Foundation; either version 3, or (at your option) 9// any later version. 10 11// This library is distributed in the hope that it will be useful, 12// but WITHOUT ANY WARRANTY; without even the implied warranty of 13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14// GNU General Public License for more details. 15 16// Under Section 7 of GPL version 3, you are granted additional 17// permissions described in the GCC Runtime Library Exception, version 18// 3.1, as published by the Free Software Foundation. 19 20// You should have received a copy of the GNU General Public License and 21// a copy of the GCC Runtime Library Exception along with this program; 22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23// <http://www.gnu.org/licenses/>. 24 25/** 26 * @file bits/regex_automaton.h 27 * This is an internal header file, included by other library headers. 28 * Do not attempt to use it directly. @headername{regex} 29 */ 30 31namespace std _GLIBCXX_VISIBILITY(default) 32{ 33namespace __detail 34{ 35_GLIBCXX_BEGIN_NAMESPACE_VERSION 36 37 /** 38 * @defgroup regex-detail Base and Implementation Classes 39 * @ingroup regex 40 * @{ 41 */ 42 43 typedef long _StateIdT; 44 static const _StateIdT _S_invalid_state_id = -1; 45 46 template<typename _CharT> 47 using _Matcher = std::function<bool (_CharT)>; 48 49 /// Operation codes that define the type of transitions within the base NFA 50 /// that represents the regular expression. 51 enum _Opcode : int 52 { 53 _S_opcode_unknown, 54 _S_opcode_alternative, 55 _S_opcode_backref, 56 _S_opcode_line_begin_assertion, 57 _S_opcode_line_end_assertion, 58 _S_opcode_word_boundary, 59 _S_opcode_subexpr_lookahead, 60 _S_opcode_subexpr_begin, 61 _S_opcode_subexpr_end, 62 _S_opcode_dummy, 63 _S_opcode_match, 64 _S_opcode_accept, 65 }; 66 67 struct _State_base 68 { 69 _Opcode _M_opcode; // type of outgoing transition 70 _StateIdT _M_next; // outgoing transition 71 union // Since they are mutually exclusive. 72 { 73 size_t _M_subexpr; // for _S_opcode_subexpr_* 74 size_t _M_backref_index; // for _S_opcode_backref 75 struct 76 { 77 // for _S_opcode_alternative. 78 _StateIdT _M_quant_index; 79 // for _S_opcode_alternative or _S_opcode_subexpr_lookahead 80 _StateIdT _M_alt; 81 // for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or 82 // quantifiers (ungreedy if set true) 83 bool _M_neg; 84 }; 85 }; 86 87 explicit _State_base(_Opcode __opcode) 88 : _M_opcode(__opcode), _M_next(_S_invalid_state_id) 89 { } 90 91 protected: 92 ~_State_base() = default; 93 94 public: 95#ifdef _GLIBCXX_DEBUG 96 std::ostream& 97 _M_print(std::ostream& ostr) const; 98 99 // Prints graphviz dot commands for state. 100 std::ostream& 101 _M_dot(std::ostream& __ostr, _StateIdT __id) const; 102#endif 103 }; 104 105 template<typename _TraitsT> 106 struct _State : _State_base 107 { 108 typedef _Matcher<typename _TraitsT::char_type> _MatcherT; 109 110 _MatcherT _M_matches; // for _S_opcode_match 111 112 explicit _State(_Opcode __opcode) : _State_base(__opcode) { } 113 }; 114 115 struct _NFA_base 116 { 117 typedef size_t _SizeT; 118 typedef regex_constants::syntax_option_type _FlagT; 119 120 explicit 121 _NFA_base(_FlagT __f) 122 : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0), 123 _M_quant_count(0), _M_has_backref(false) 124 { } 125 126 _NFA_base(_NFA_base&&) = default; 127 128 protected: 129 ~_NFA_base() = default; 130 131 public: 132 _FlagT 133 _M_options() const 134 { return _M_flags; } 135 136 _StateIdT 137 _M_start() const 138 { return _M_start_state; } 139 140 _SizeT 141 _M_sub_count() const 142 { return _M_subexpr_count; } 143 144 std::vector<size_t> _M_paren_stack; 145 _FlagT _M_flags; 146 _StateIdT _M_start_state; 147 _SizeT _M_subexpr_count; 148 _SizeT _M_quant_count; 149 bool _M_has_backref; 150 }; 151 152 template<typename _TraitsT> 153 struct _NFA 154 : _NFA_base, std::vector<_State<_TraitsT>> 155 { 156 typedef _State<_TraitsT> _StateT; 157 typedef _Matcher<typename _TraitsT::char_type> _MatcherT; 158 159 using _NFA_base::_NFA_base; 160 161 // for performance reasons _NFA objects should only be moved not copied 162 _NFA(const _NFA&) = delete; 163 _NFA(_NFA&&) = default; 164 165 _StateIdT 166 _M_insert_accept() 167 { 168 auto __ret = _M_insert_state(_StateT(_S_opcode_accept)); 169 return __ret; 170 } 171 172 _StateIdT 173 _M_insert_alt(_StateIdT __next, _StateIdT __alt, bool __neg) 174 { 175 _StateT __tmp(_S_opcode_alternative); 176 // It labels every quantifier to make greedy comparison easier in BFS 177 // approach. 178 __tmp._M_quant_index = this->_M_quant_count++; 179 __tmp._M_next = __next; 180 __tmp._M_alt = __alt; 181 __tmp._M_neg = __neg; 182 return _M_insert_state(std::move(__tmp)); 183 } 184 185 _StateIdT 186 _M_insert_matcher(_MatcherT __m) 187 { 188 _StateT __tmp(_S_opcode_match); 189 __tmp._M_matches = std::move(__m); 190 return _M_insert_state(std::move(__tmp)); 191 } 192 193 _StateIdT 194 _M_insert_subexpr_begin() 195 { 196 auto __id = this->_M_subexpr_count++; 197 this->_M_paren_stack.push_back(__id); 198 _StateT __tmp(_S_opcode_subexpr_begin); 199 __tmp._M_subexpr = __id; 200 return _M_insert_state(std::move(__tmp)); 201 } 202 203 _StateIdT 204 _M_insert_subexpr_end() 205 { 206 _StateT __tmp(_S_opcode_subexpr_end); 207 __tmp._M_subexpr = this->_M_paren_stack.back(); 208 this->_M_paren_stack.pop_back(); 209 return _M_insert_state(std::move(__tmp)); 210 } 211 212 _StateIdT 213 _M_insert_backref(size_t __index); 214 215 _StateIdT 216 _M_insert_line_begin() 217 { return _M_insert_state(_StateT(_S_opcode_line_begin_assertion)); } 218 219 _StateIdT 220 _M_insert_line_end() 221 { return _M_insert_state(_StateT(_S_opcode_line_end_assertion)); } 222 223 _StateIdT 224 _M_insert_word_bound(bool __neg) 225 { 226 _StateT __tmp(_S_opcode_word_boundary); 227 __tmp._M_neg = __neg; 228 return _M_insert_state(std::move(__tmp)); 229 } 230 231 _StateIdT 232 _M_insert_lookahead(_StateIdT __alt, bool __neg) 233 { 234 _StateT __tmp(_S_opcode_subexpr_lookahead); 235 __tmp._M_alt = __alt; 236 __tmp._M_neg = __neg; 237 return _M_insert_state(std::move(__tmp)); 238 } 239 240 _StateIdT 241 _M_insert_dummy() 242 { return _M_insert_state(_StateT(_S_opcode_dummy)); } 243 244 _StateIdT 245 _M_insert_state(_StateT __s) 246 { 247 this->push_back(std::move(__s)); 248 return this->size()-1; 249 } 250 251 // Eliminate dummy node in this NFA to make it compact. 252 void 253 _M_eliminate_dummy(); 254 255#ifdef _GLIBCXX_DEBUG 256 std::ostream& 257 _M_dot(std::ostream& __ostr) const; 258#endif 259 }; 260 261 /// Describes a sequence of one or more %_State, its current start 262 /// and end(s). This structure contains fragments of an NFA during 263 /// construction. 264 template<typename _TraitsT> 265 class _StateSeq 266 { 267 public: 268 typedef _NFA<_TraitsT> _RegexT; 269 270 public: 271 _StateSeq(_RegexT& __nfa, _StateIdT __s) 272 : _M_nfa(__nfa), _M_start(__s), _M_end(__s) 273 { } 274 275 _StateSeq(_RegexT& __nfa, _StateIdT __s, _StateIdT __end) 276 : _M_nfa(__nfa), _M_start(__s), _M_end(__end) 277 { } 278 279 // Append a state on *this and change *this to the new sequence. 280 void 281 _M_append(_StateIdT __id) 282 { 283 _M_nfa[_M_end]._M_next = __id; 284 _M_end = __id; 285 } 286 287 // Append a sequence on *this and change *this to the new sequence. 288 void 289 _M_append(const _StateSeq& __s) 290 { 291 _M_nfa[_M_end]._M_next = __s._M_start; 292 _M_end = __s._M_end; 293 } 294 295 // Clones an entire sequence. 296 _StateSeq 297 _M_clone(); 298 299 public: 300 _RegexT& _M_nfa; 301 _StateIdT _M_start; 302 _StateIdT _M_end; 303 }; 304 305 //@} regex-detail 306_GLIBCXX_END_NAMESPACE_VERSION 307} // namespace __detail 308} // namespace std 309 310#include <bits/regex_automaton.tcc> 311