1a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* -*- buffer-read-only: t -*- vi: set ro: */ 2a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* DO NOT EDIT! GENERATED AUTOMATICALLY! */ 3a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Extended regular expression matching and search library. 4a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009 5a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Free Software Foundation, Inc. 6a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner This file is part of the GNU C Library. 7a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. 8a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 9a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner This program is free software; you can redistribute it and/or modify 10a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner it under the terms of the GNU General Public License as published by 11a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner the Free Software Foundation; either version 3, or (at your option) 12a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner any later version. 13a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 14a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner This program is distributed in the hope that it will be useful, 15a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner but WITHOUT ANY WARRANTY; without even the implied warranty of 16a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner GNU General Public License for more details. 18a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 19a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner You should have received a copy of the GNU General Public License along 20a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner with this program; if not, write to the Free Software Foundation, 21a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 22a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 23a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, 24a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner size_t length, reg_syntax_t syntax); 25a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic void re_compile_fastmap_iter (regex_t *bufp, 26a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const re_dfastate_t *init_state, 27a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner char *fastmap); 28a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len); 29a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 30a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic void free_charset (re_charset_t *cset); 31a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* RE_ENABLE_I18N */ 32a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic void free_workarea_compile (regex_t *preg); 33a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t create_initial_state (re_dfa_t *dfa); 34a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 35a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic void optimize_utf8 (re_dfa_t *dfa); 36a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 37a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t analyze (regex_t *preg); 38a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t preorder (bin_tree_t *root, 39a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_errcode_t (fn (void *, bin_tree_t *)), 40a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner void *extra); 41a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t postorder (bin_tree_t *root, 42a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_errcode_t (fn (void *, bin_tree_t *)), 43a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner void *extra); 44a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node); 45a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t lower_subexps (void *extra, bin_tree_t *node); 46a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg, 47a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *node); 48a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t calc_first (void *extra, bin_tree_t *node); 49a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t calc_next (void *extra, bin_tree_t *node); 50a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node); 51a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic Idx duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint); 52a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic Idx search_duplicated_node (const re_dfa_t *dfa, Idx org_node, 53a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner unsigned int constraint); 54a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t calc_eclosure (re_dfa_t *dfa); 55a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, 56a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx node, bool root); 57a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t calc_inveclosure (re_dfa_t *dfa); 58a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic Idx fetch_number (re_string_t *input, re_token_t *token, 59a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_syntax_t syntax); 60a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic int peek_token (re_token_t *token, re_string_t *input, 61a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_syntax_t syntax) internal_function; 62a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t *parse (re_string_t *regexp, regex_t *preg, 63a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_syntax_t syntax, reg_errcode_t *err); 64a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, 65a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_t *token, reg_syntax_t syntax, 66a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx nest, reg_errcode_t *err); 67a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg, 68a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_t *token, reg_syntax_t syntax, 69a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx nest, reg_errcode_t *err); 70a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg, 71a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_t *token, reg_syntax_t syntax, 72a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx nest, reg_errcode_t *err); 73a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg, 74a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_t *token, reg_syntax_t syntax, 75a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx nest, reg_errcode_t *err); 76a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp, 77a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_dfa_t *dfa, re_token_t *token, 78a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_syntax_t syntax, reg_errcode_t *err); 79a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, 80a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_t *token, reg_syntax_t syntax, 81a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_errcode_t *err); 82a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t parse_bracket_element (bracket_elem_t *elem, 83a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_string_t *regexp, 84a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_t *token, int token_len, 85a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_dfa_t *dfa, 86a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_syntax_t syntax, 87a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bool accept_hyphen); 88a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, 89a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_string_t *regexp, 90a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_t *token); 91a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 92a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t build_equiv_class (bitset_t sbcset, 93a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_charset_t *mbcset, 94a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx *equiv_class_alloc, 95a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const unsigned char *name); 96a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, 97a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_t sbcset, 98a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_charset_t *mbcset, 99a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx *char_class_alloc, 100a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const unsigned char *class_name, 101a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_syntax_t syntax); 102a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#else /* not RE_ENABLE_I18N */ 103a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t build_equiv_class (bitset_t sbcset, 104a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const unsigned char *name); 105a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, 106a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_t sbcset, 107a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const unsigned char *class_name, 108a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_syntax_t syntax); 109a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* not RE_ENABLE_I18N */ 110a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t *build_charclass_op (re_dfa_t *dfa, 111a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner RE_TRANSLATE_TYPE trans, 112a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const unsigned char *class_name, 113a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const unsigned char *extra, 114a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bool non_match, reg_errcode_t *err); 115a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t *create_tree (re_dfa_t *dfa, 116a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *left, bin_tree_t *right, 117a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_type_t type); 118a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t *create_token_tree (re_dfa_t *dfa, 119a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *left, bin_tree_t *right, 120a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const re_token_t *token); 121a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa); 122a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic void free_token (re_token_t *node); 123a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t free_tree (void *extra, bin_tree_t *node); 124a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node); 125a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 126a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* This table gives an error message for each of the error codes listed 127a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner in regex.h. Obviously the order here has to be same as there. 128a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner POSIX doesn't require that we do anything for REG_NOERROR, 129a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner but why not be nice? */ 130a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 131a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic const char __re_error_msgid[] = 132a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 133a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#define REG_NOERROR_IDX 0 134a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner gettext_noop ("Success") /* REG_NOERROR */ 135a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner "\0" 136a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") 137a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner gettext_noop ("No match") /* REG_NOMATCH */ 138a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner "\0" 139a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") 140a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner gettext_noop ("Invalid regular expression") /* REG_BADPAT */ 141a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner "\0" 142a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") 143a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner gettext_noop ("Invalid collation character") /* REG_ECOLLATE */ 144a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner "\0" 145a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") 146a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner gettext_noop ("Invalid character class name") /* REG_ECTYPE */ 147a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner "\0" 148a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") 149a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner gettext_noop ("Trailing backslash") /* REG_EESCAPE */ 150a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner "\0" 151a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") 152a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner gettext_noop ("Invalid back reference") /* REG_ESUBREG */ 153a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner "\0" 154a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") 155a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ 156a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner "\0" 157a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") 158a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ 159a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner "\0" 160a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") 161a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner gettext_noop ("Unmatched \\{") /* REG_EBRACE */ 162a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner "\0" 163a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") 164a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */ 165a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner "\0" 166a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") 167a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner gettext_noop ("Invalid range end") /* REG_ERANGE */ 168a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner "\0" 169a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") 170a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner gettext_noop ("Memory exhausted") /* REG_ESPACE */ 171a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner "\0" 172a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") 173a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */ 174a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner "\0" 175a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") 176a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner gettext_noop ("Premature end of regular expression") /* REG_EEND */ 177a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner "\0" 178a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") 179a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner gettext_noop ("Regular expression too big") /* REG_ESIZE */ 180a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner "\0" 181a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") 182a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ 183a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner }; 184a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 185a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic const size_t __re_error_msgid_idx[] = 186a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 187a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner REG_NOERROR_IDX, 188a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner REG_NOMATCH_IDX, 189a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner REG_BADPAT_IDX, 190a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner REG_ECOLLATE_IDX, 191a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner REG_ECTYPE_IDX, 192a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner REG_EESCAPE_IDX, 193a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner REG_ESUBREG_IDX, 194a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner REG_EBRACK_IDX, 195a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner REG_EPAREN_IDX, 196a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner REG_EBRACE_IDX, 197a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner REG_BADBR_IDX, 198a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner REG_ERANGE_IDX, 199a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner REG_ESPACE_IDX, 200a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner REG_BADRPT_IDX, 201a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner REG_EEND_IDX, 202a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner REG_ESIZE_IDX, 203a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner REG_ERPAREN_IDX 204a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner }; 205a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 206a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Entry points for GNU code. */ 207a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 208a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* re_compile_pattern is the GNU regular expression compiler: it 209a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner compiles PATTERN (of length LENGTH) and puts the result in BUFP. 210a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Returns 0 if the pattern was valid, otherwise an error string. 211a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 212a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Assumes the `allocated' (and perhaps `buffer') and `translate' fields 213a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner are set in BUFP on entry. */ 214a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 215a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef _LIBC 216a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerconst char * 217a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerre_compile_pattern (pattern, length, bufp) 218a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const char *pattern; 219a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner size_t length; 220a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner struct re_pattern_buffer *bufp; 221a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#else /* size_t might promote */ 222a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerconst char * 223a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerre_compile_pattern (const char *pattern, size_t length, 224a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner struct re_pattern_buffer *bufp) 225a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 226a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 227a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_errcode_t ret; 228a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 229a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* And GNU code determines whether or not to get register information 230a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner by passing null for the REGS argument to re_match, etc., not by 231a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner setting no_sub, unless RE_NO_SUB is set. */ 232a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bufp->no_sub = !!(re_syntax_options & RE_NO_SUB); 233a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 234a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Match anchors at newline. */ 235a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bufp->newline_anchor = 1; 236a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 237a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ret = re_compile_internal (bufp, pattern, length, re_syntax_options); 238a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 239a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!ret) 240a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 241a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); 242a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 243a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef _LIBC 244a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerweak_alias (__re_compile_pattern, re_compile_pattern) 245a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 246a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 247a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can 248a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner also be assigned to arbitrarily: each pattern buffer stores its own 249a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner syntax, so it can be changed between regex compilations. */ 250a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* This has no initializer because initialized variables in Emacs 251a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner become read-only after dumping. */ 252a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerreg_syntax_t re_syntax_options; 253a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 254a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 255a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Specify the precise syntax of regexps for compilation. This provides 256a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for compatibility for various utilities which historically have 257a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner different, incompatible syntaxes. 258a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 259a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner The argument SYNTAX is a bit mask comprised of the various bits 260a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner defined in regex.h. We return the old syntax. */ 261a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 262a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerreg_syntax_t 263a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerre_set_syntax (syntax) 264a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_syntax_t syntax; 265a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 266a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_syntax_t ret = re_syntax_options; 267a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 268a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_syntax_options = syntax; 269a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return ret; 270a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 271a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef _LIBC 272a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerweak_alias (__re_set_syntax, re_set_syntax) 273a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 274a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 275a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerint 276a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerre_compile_fastmap (bufp) 277a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner struct re_pattern_buffer *bufp; 278a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 279a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; 280a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner char *fastmap = bufp->fastmap; 281a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 282a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner memset (fastmap, '\0', sizeof (char) * SBC_MAX); 283a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_compile_fastmap_iter (bufp, dfa->init_state, fastmap); 284a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->init_state != dfa->init_state_word) 285a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap); 286a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->init_state != dfa->init_state_nl) 287a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap); 288a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->init_state != dfa->init_state_begbuf) 289a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap); 290a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bufp->fastmap_accurate = 1; 291a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return 0; 292a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 293a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef _LIBC 294a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerweak_alias (__re_compile_fastmap, re_compile_fastmap) 295a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 296a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 297a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic inline void 298a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner__attribute ((always_inline)) 299a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerre_set_fastmap (char *fastmap, bool icase, int ch) 300a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 301a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner fastmap[ch] = 1; 302a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (icase) 303a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner fastmap[tolower (ch)] = 1; 304a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 305a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 306a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Helper function for re_compile_fastmap. 307a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Compile fastmap for the initial_state INIT_STATE. */ 308a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 309a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic void 310a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerre_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, 311a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner char *fastmap) 312a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 313a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; 314a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx node_cnt; 315a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bool icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE)); 316a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) 317a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 318a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx node = init_state->nodes.elems[node_cnt]; 319a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_type_t type = dfa->nodes[node].type; 320a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 321a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (type == CHARACTER) 322a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 323a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); 324a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 325a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) 326a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 327a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner unsigned char buf[MB_LEN_MAX]; 328a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner unsigned char *p; 329a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner wchar_t wc; 330a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbstate_t state; 331a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 332a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner p = buf; 333a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *p++ = dfa->nodes[node].opr.c; 334a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner while (++node < dfa->nodes_len 335a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && dfa->nodes[node].type == CHARACTER 336a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && dfa->nodes[node].mb_partial) 337a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *p++ = dfa->nodes[node].opr.c; 338a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner memset (&state, '\0', sizeof (state)); 339a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (__mbrtowc (&wc, (const char *) buf, p - buf, 340a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner &state) == p - buf 341a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && (__wcrtomb ((char *) buf, towlower (wc), &state) 342a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner != (size_t) -1)) 343a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_set_fastmap (fastmap, false, buf[0]); 344a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 345a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 346a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 347a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (type == SIMPLE_BRACKET) 348a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 349a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int i, ch; 350a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (i = 0, ch = 0; i < BITSET_WORDS; ++i) 351a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 352a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int j; 353a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_word_t w = dfa->nodes[node].opr.sbcset[i]; 354a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) 355a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (w & ((bitset_word_t) 1 << j)) 356a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_set_fastmap (fastmap, icase, ch); 357a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 358a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 359a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 360a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (type == COMPLEX_BRACKET) 361a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 362a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_charset_t *cset = dfa->nodes[node].opr.mbcset; 363a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx i; 364a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 365a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# ifdef _LIBC 366a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* See if we have to try all bytes which start multiple collation 367a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner elements. 368a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner e.g. In da_DK, we want to catch 'a' since "aa" is a valid 369a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner collation element, and don't catch 'b' since 'b' is 370a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner the only collation element which starts from 'b' (and 371a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner it is caught by SIMPLE_BRACKET). */ 372a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0 373a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && (cset->ncoll_syms || cset->nranges)) 374a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 375a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const int32_t *table = (const int32_t *) 376a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); 377a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (i = 0; i < SBC_MAX; ++i) 378a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (table[i] < 0) 379a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_set_fastmap (fastmap, icase, i); 380a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 381a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# endif /* _LIBC */ 382a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 383a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* See if we have to start the match at all multibyte characters, 384a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner i.e. where we would not find an invalid sequence. This only 385a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner applies to multibyte character sets; for single byte character 386a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner sets, the SIMPLE_BRACKET again suffices. */ 387a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->mb_cur_max > 1 388a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && (cset->nchar_classes || cset->non_match 389a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# ifdef _LIBC 390a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner || cset->nequiv_classes 391a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# endif /* _LIBC */ 392a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner )) 393a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 394a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner unsigned char c = 0; 395a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner do 396a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 397a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbstate_t mbs; 398a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner memset (&mbs, 0, sizeof (mbs)); 399a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2) 400a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_set_fastmap (fastmap, false, (int) c); 401a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 402a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner while (++c != 0); 403a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 404a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 405a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 406a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 407a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* ... Else catch all bytes which can start the mbchars. */ 408a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (i = 0; i < cset->nmbchars; ++i) 409a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 410a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner char buf[256]; 411a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbstate_t state; 412a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner memset (&state, '\0', sizeof (state)); 413a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1) 414a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_set_fastmap (fastmap, icase, *(unsigned char *) buf); 415a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) 416a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 417a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) 418a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner != (size_t) -1) 419a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_set_fastmap (fastmap, false, *(unsigned char *) buf); 420a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 421a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 422a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 423a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 424a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* RE_ENABLE_I18N */ 425a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (type == OP_PERIOD 426a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 427a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner || type == OP_UTF8_PERIOD 428a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* RE_ENABLE_I18N */ 429a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner || type == END_OF_RE) 430a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 431a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner memset (fastmap, '\1', sizeof (char) * SBC_MAX); 432a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (type == END_OF_RE) 433a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bufp->can_be_null = 1; 434a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return; 435a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 436a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 437a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 438a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 439a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Entry point for POSIX code. */ 440a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* regcomp takes a regular expression as a string and compiles it. 441a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 442a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner PREG is a regex_t *. We do not expect any fields to be initialized, 443a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner since POSIX says we shouldn't. Thus, we set 444a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 445a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner `buffer' to the compiled pattern; 446a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner `used' to the length of the compiled pattern; 447a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner `syntax' to RE_SYNTAX_POSIX_EXTENDED if the 448a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner REG_EXTENDED bit in CFLAGS is set; otherwise, to 449a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner RE_SYNTAX_POSIX_BASIC; 450a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner `newline_anchor' to REG_NEWLINE being set in CFLAGS; 451a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner `fastmap' to an allocated space for the fastmap; 452a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner `fastmap_accurate' to zero; 453a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner `re_nsub' to the number of subexpressions in PATTERN. 454a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 455a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner PATTERN is the address of the pattern string. 456a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 457a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner CFLAGS is a series of bits which affect compilation. 458a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 459a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we 460a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner use POSIX basic syntax. 461a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 462a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner If REG_NEWLINE is set, then . and [^...] don't match newline. 463a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Also, regexec will try a match beginning after every newline. 464a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 465a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner If REG_ICASE is set, then we considers upper- and lowercase 466a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner versions of letters to be equivalent when matching. 467a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 468a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner If REG_NOSUB is set, then when PREG is passed to regexec, that 469a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner routine will report only success or failure, and nothing about the 470a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner registers. 471a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 472a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for 473a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner the return codes and their meanings.) */ 474a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 475a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerint 476a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerregcomp (preg, pattern, cflags) 477a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner regex_t *_Restrict_ preg; 478a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const char *_Restrict_ pattern; 479a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int cflags; 480a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 481a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_errcode_t ret; 482a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED 483a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner : RE_SYNTAX_POSIX_BASIC); 484a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 485a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->buffer = NULL; 486a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->allocated = 0; 487a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->used = 0; 488a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 489a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Try to allocate space for the fastmap. */ 490a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->fastmap = re_malloc (char, SBC_MAX); 491a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (preg->fastmap == NULL, 0)) 492a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 493a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 494a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0; 495a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 496a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* If REG_NEWLINE is set, newlines are treated differently. */ 497a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (cflags & REG_NEWLINE) 498a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { /* REG_NEWLINE implies neither . nor [^...] match newline. */ 499a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner syntax &= ~RE_DOT_NEWLINE; 500a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner syntax |= RE_HAT_LISTS_NOT_NEWLINE; 501a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* It also changes the matching behavior. */ 502a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->newline_anchor = 1; 503a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 504a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 505a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->newline_anchor = 0; 506a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->no_sub = !!(cflags & REG_NOSUB); 507a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->translate = NULL; 508a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 509a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ret = re_compile_internal (preg, pattern, strlen (pattern), syntax); 510a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 511a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* POSIX doesn't distinguish between an unmatched open-group and an 512a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner unmatched close-group: both are REG_EPAREN. */ 513a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (ret == REG_ERPAREN) 514a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ret = REG_EPAREN; 515a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 516a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* We have already checked preg->fastmap != NULL. */ 517a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (ret == REG_NOERROR, 1)) 518a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Compute the fastmap now, since regexec cannot modify the pattern 519a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner buffer. This function never fails in this implementation. */ 520a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner (void) re_compile_fastmap (preg); 521a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 522a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 523a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Some error occurred while compiling the expression. */ 524a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (preg->fastmap); 525a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->fastmap = NULL; 526a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 527a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 528a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return (int) ret; 529a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 530a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef _LIBC 531a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerweak_alias (__regcomp, regcomp) 532a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 533a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 534a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Returns a message corresponding to an error code, ERRCODE, returned 535a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner from either regcomp or regexec. We don't use PREG here. */ 536a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 537a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef _LIBC 538a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnersize_t 539a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerregerror (errcode, preg, errbuf, errbuf_size) 540a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int errcode; 541a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const regex_t *_Restrict_ preg; 542a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner char *_Restrict_ errbuf; 543a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner size_t errbuf_size; 544a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#else /* size_t might promote */ 545a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnersize_t 546a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerregerror (int errcode, const regex_t *_Restrict_ preg, 547a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner char *_Restrict_ errbuf, size_t errbuf_size) 548a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 549a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 550a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const char *msg; 551a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner size_t msg_size; 552a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 553a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (errcode < 0 554a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner || errcode >= (int) (sizeof (__re_error_msgid_idx) 555a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner / sizeof (__re_error_msgid_idx[0])), 0)) 556a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Only error codes returned by the rest of the code should be passed 557a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner to this routine. If we are given anything else, or if other regex 558a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner code generates an invalid error code, then the program has a bug. 559a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Dump core so we can fix it. */ 560a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner abort (); 561a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 562a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]); 563a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 564a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner msg_size = strlen (msg) + 1; /* Includes the null. */ 565a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 566a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (errbuf_size != 0, 1)) 567a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 568a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner size_t cpy_size = msg_size; 569a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (msg_size > errbuf_size, 0)) 570a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 571a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner cpy_size = errbuf_size - 1; 572a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner errbuf[cpy_size] = '\0'; 573a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 574a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner memcpy (errbuf, msg, cpy_size); 575a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 576a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 577a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return msg_size; 578a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 579a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef _LIBC 580a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerweak_alias (__regerror, regerror) 581a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 582a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 583a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 584a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 585a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* This static array is used for the map to single-byte characters when 586a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner UTF-8 is used. Otherwise we would allocate memory just to initialize 587a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner it the same all the time. UTF-8 is the preferred encoding so this is 588a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner a worthwhile optimization. */ 589a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic const bitset_t utf8_sb_map = 590a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 591a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Set the first 128 bits. */ 592a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# if 4 * BITSET_WORD_BITS < ASCII_CHARS 593a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# error "bitset_word_t is narrower than 32 bits" 594a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# elif 3 * BITSET_WORD_BITS < ASCII_CHARS 595a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX, 596a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# elif 2 * BITSET_WORD_BITS < ASCII_CHARS 597a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner BITSET_WORD_MAX, BITSET_WORD_MAX, 598a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# elif 1 * BITSET_WORD_BITS < ASCII_CHARS 599a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner BITSET_WORD_MAX, 600a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# endif 601a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner (BITSET_WORD_MAX 602a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner >> (SBC_MAX % BITSET_WORD_BITS == 0 603a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ? 0 604a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS)) 605a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner}; 606a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 607a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 608a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 609a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic void 610a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerfree_dfa_content (re_dfa_t *dfa) 611a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 612a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx i, j; 613a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 614a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->nodes) 615a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (i = 0; i < dfa->nodes_len; ++i) 616a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner free_token (dfa->nodes + i); 617a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (dfa->nexts); 618a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (i = 0; i < dfa->nodes_len; ++i) 619a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 620a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->eclosures != NULL) 621a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_node_set_free (dfa->eclosures + i); 622a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->inveclosures != NULL) 623a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_node_set_free (dfa->inveclosures + i); 624a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->edests != NULL) 625a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_node_set_free (dfa->edests + i); 626a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 627a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (dfa->edests); 628a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (dfa->eclosures); 629a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (dfa->inveclosures); 630a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (dfa->nodes); 631a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 632a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->state_table) 633a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (i = 0; i <= dfa->state_hash_mask; ++i) 634a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 635a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner struct re_state_table_entry *entry = dfa->state_table + i; 636a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (j = 0; j < entry->num; ++j) 637a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 638a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_dfastate_t *state = entry->array[j]; 639a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner free_state (state); 640a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 641a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (entry->array); 642a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 643a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (dfa->state_table); 644a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 645a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->sb_char != utf8_sb_map) 646a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (dfa->sb_char); 647a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 648a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (dfa->subexp_map); 649a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef DEBUG 650a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (dfa->re_str); 651a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 652a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 653a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (dfa); 654a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 655a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 656a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 657a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Free dynamically allocated space used by PREG. */ 658a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 659a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnervoid 660a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerregfree (preg) 661a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner regex_t *preg; 662a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 663a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_dfa_t *dfa = (re_dfa_t *) preg->buffer; 664a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (dfa != NULL, 1)) 665a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner free_dfa_content (dfa); 666a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->buffer = NULL; 667a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->allocated = 0; 668a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 669a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (preg->fastmap); 670a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->fastmap = NULL; 671a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 672a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (preg->translate); 673a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->translate = NULL; 674a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 675a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef _LIBC 676a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerweak_alias (__regfree, regfree) 677a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 678a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 679a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Entry points compatible with 4.2 BSD regex library. We don't define 680a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner them unless specifically requested. */ 681a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 682a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#if defined _REGEX_RE_COMP || defined _LIBC 683a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 684a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* BSD has one and only one pattern buffer. */ 685a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic struct re_pattern_buffer re_comp_buf; 686a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 687a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerchar * 688a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# ifdef _LIBC 689a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Make these definitions weak in libc, so POSIX programs can redefine 690a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner these names if they don't use our functions, and still use 691a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner regcomp/regexec above without link errors. */ 692a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerweak_function 693a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# endif 694a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerre_comp (s) 695a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const char *s; 696a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 697a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_errcode_t ret; 698a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner char *fastmap; 699a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 700a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!s) 701a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 702a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!re_comp_buf.buffer) 703a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return gettext ("No previous regular expression"); 704a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return 0; 705a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 706a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 707a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (re_comp_buf.buffer) 708a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 709a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner fastmap = re_comp_buf.fastmap; 710a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_comp_buf.fastmap = NULL; 711a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner __regfree (&re_comp_buf); 712a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner memset (&re_comp_buf, '\0', sizeof (re_comp_buf)); 713a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_comp_buf.fastmap = fastmap; 714a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 715a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 716a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (re_comp_buf.fastmap == NULL) 717a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 718a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_comp_buf.fastmap = (char *) malloc (SBC_MAX); 719a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (re_comp_buf.fastmap == NULL) 720a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return (char *) gettext (__re_error_msgid 721a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner + __re_error_msgid_idx[(int) REG_ESPACE]); 722a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 723a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 724a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Since `re_exec' always passes NULL for the `regs' argument, we 725a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner don't need to initialize the pattern buffer fields which affect it. */ 726a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 727a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Match anchors at newlines. */ 728a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_comp_buf.newline_anchor = 1; 729a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 730a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options); 731a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 732a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!ret) 733a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 734a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 735a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ 736a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); 737a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 738a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 739a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef _LIBC 740a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerlibc_freeres_fn (free_mem) 741a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 742a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner __regfree (&re_comp_buf); 743a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 744a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 745a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 746a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* _REGEX_RE_COMP */ 747a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 748a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Internal entry point. 749a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Compile the regular expression PATTERN, whose length is LENGTH. 750a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner SYNTAX indicate regular expression's syntax. */ 751a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 752a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 753a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerre_compile_internal (regex_t *preg, const char * pattern, size_t length, 754a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_syntax_t syntax) 755a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 756a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_errcode_t err = REG_NOERROR; 757a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_dfa_t *dfa; 758a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_string_t regexp; 759a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 760a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Initialize the pattern buffer. */ 761a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->fastmap_accurate = 0; 762a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->syntax = syntax; 763a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->not_bol = preg->not_eol = 0; 764a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->used = 0; 765a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->re_nsub = 0; 766a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->can_be_null = 0; 767a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->regs_allocated = REGS_UNALLOCATED; 768a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 769a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Initialize the dfa. */ 770a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa = (re_dfa_t *) preg->buffer; 771a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (preg->allocated < sizeof (re_dfa_t), 0)) 772a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 773a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* If zero allocated, but buffer is non-null, try to realloc 774a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner enough space. This loses if buffer's address is bogus, but 775a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner that is the user's responsibility. If ->buffer is NULL this 776a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner is a simple allocation. */ 777a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa = re_realloc (preg->buffer, re_dfa_t, 1); 778a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa == NULL) 779a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 780a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->allocated = sizeof (re_dfa_t); 781a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->buffer = (unsigned char *) dfa; 782a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 783a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->used = sizeof (re_dfa_t); 784a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 785a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner err = init_dfa (dfa, length); 786a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (err != REG_NOERROR, 0)) 787a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 788a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner free_dfa_content (dfa); 789a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->buffer = NULL; 790a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->allocated = 0; 791a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return err; 792a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 793a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef DEBUG 794a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Note: length+1 will not overflow since it is checked in init_dfa. */ 795a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->re_str = re_malloc (char, length + 1); 796a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner strncpy (dfa->re_str, pattern, length + 1); 797a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 798a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 799a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner __libc_lock_init (dfa->lock); 800a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 801a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner err = re_string_construct (®exp, pattern, length, preg->translate, 802a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner (syntax & RE_ICASE) != 0, dfa); 803a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (err != REG_NOERROR, 0)) 804a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 805a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_compile_internal_free_return: 806a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner free_workarea_compile (preg); 807a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_string_destruct (®exp); 808a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner free_dfa_content (dfa); 809a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->buffer = NULL; 810a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->allocated = 0; 811a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return err; 812a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 813a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 814a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Parse the regular expression, and build a structure tree. */ 815a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->re_nsub = 0; 816a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->str_tree = parse (®exp, preg, syntax, &err); 817a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (dfa->str_tree == NULL, 0)) 818a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto re_compile_internal_free_return; 819a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 820a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Analyze the tree and create the nfa. */ 821a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner err = analyze (preg); 822a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (err != REG_NOERROR, 0)) 823a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto re_compile_internal_free_return; 824a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 825a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 826a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* If possible, do searching in single byte encoding to speed things up. */ 827a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL) 828a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner optimize_utf8 (dfa); 829a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 830a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 831a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Then create the initial state of the dfa. */ 832a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner err = create_initial_state (dfa); 833a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 834a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Release work areas. */ 835a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner free_workarea_compile (preg); 836a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_string_destruct (®exp); 837a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 838a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (err != REG_NOERROR, 0)) 839a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 840a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner free_dfa_content (dfa); 841a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->buffer = NULL; 842a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preg->allocated = 0; 843a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 844a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 845a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return err; 846a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 847a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 848a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Initialize DFA. We use the length of the regular expression PAT_LEN 849a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner as the initial length of some arrays. */ 850a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 851a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 852a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerinit_dfa (re_dfa_t *dfa, size_t pat_len) 853a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 854a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner __re_size_t table_size; 855a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 856a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t)); 857a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#else 858a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner size_t max_i18n_object_size = 0; 859a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 860a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner size_t max_object_size = 861a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner MAX (sizeof (struct re_state_table_entry), 862a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner MAX (sizeof (re_token_t), 863a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner MAX (sizeof (re_node_set), 864a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner MAX (sizeof (regmatch_t), 865a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner max_i18n_object_size)))); 866a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 867a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner memset (dfa, '\0', sizeof (re_dfa_t)); 868a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 869a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Force allocation of str_tree_storage the first time. */ 870a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; 871a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 872a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Avoid overflows. The extra "/ 2" is for the table_size doubling 873a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner calculation below, and for similar doubling calculations 874a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner elsewhere. And it's <= rather than <, because some of the 875a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner doubling calculations add 1 afterwards. */ 876a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (SIZE_MAX / max_object_size / 2 <= pat_len, 0)) 877a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 878a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 879a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->nodes_alloc = pat_len + 1; 880a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc); 881a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 882a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* table_size = 2 ^ ceil(log pat_len) */ 883a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (table_size = 1; ; table_size <<= 1) 884a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (table_size > pat_len) 885a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 886a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 887a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size); 888a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->state_hash_mask = table_size - 1; 889a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 890a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->mb_cur_max = MB_CUR_MAX; 891a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef _LIBC 892a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->mb_cur_max == 6 893a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0) 894a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->is_utf8 = 1; 895a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII) 896a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner != 0); 897a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#else 898a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (strcmp (locale_charset (), "UTF-8") == 0) 899a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->is_utf8 = 1; 900a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 901a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* We check exhaustively in the loop below if this charset is a 902a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner superset of ASCII. */ 903a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->map_notascii = 0; 904a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 905a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 906a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 907a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->mb_cur_max > 1) 908a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 909a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->is_utf8) 910a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map; 911a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 912a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 913a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int i, j, ch; 914a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 915a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); 916a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (dfa->sb_char == NULL, 0)) 917a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 918a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 919a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Set the bits corresponding to single byte chars. */ 920a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (i = 0, ch = 0; i < BITSET_WORDS; ++i) 921a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) 922a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 923a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner wint_t wch = __btowc (ch); 924a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (wch != WEOF) 925a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->sb_char[i] |= (bitset_word_t) 1 << j; 926a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# ifndef _LIBC 927a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (isascii (ch) && wch != ch) 928a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->map_notascii = 1; 929a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# endif 930a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 931a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 932a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 933a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 934a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 935a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0)) 936a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 937a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 938a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 939a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 940a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Initialize WORD_CHAR table, which indicate which character is 941a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner "word". In this case "word" means that it is the word construction 942a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner character used by some operators like "\<", "\>", etc. */ 943a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 944a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic void 945a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerinternal_function 946a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerinit_word_char (re_dfa_t *dfa) 947a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 948a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int i, j, ch; 949a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->word_ops_used = 1; 950a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (i = 0, ch = 0; i < BITSET_WORDS; ++i) 951a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) 952a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (isalnum (ch) || ch == '_') 953a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->word_char[i] |= (bitset_word_t) 1 << j; 954a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 955a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 956a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Free the work area which are only used while compiling. */ 957a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 958a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic void 959a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerfree_workarea_compile (regex_t *preg) 960a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 961a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_dfa_t *dfa = (re_dfa_t *) preg->buffer; 962a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_storage_t *storage, *next; 963a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (storage = dfa->str_tree_storage; storage; storage = next) 964a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 965a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner next = storage->next; 966a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (storage); 967a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 968a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->str_tree_storage = NULL; 969a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; 970a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->str_tree = NULL; 971a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (dfa->org_indices); 972a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->org_indices = NULL; 973a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 974a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 975a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Create initial states for all contexts. */ 976a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 977a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 978a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnercreate_initial_state (re_dfa_t *dfa) 979a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 980a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx first, i; 981a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_errcode_t err; 982a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_node_set init_nodes; 983a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 984a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Initial states have the epsilon closure of the node which is 985a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner the first node of the regular expression. */ 986a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner first = dfa->str_tree->first->node_idx; 987a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->init_node = first; 988a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first); 989a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (err != REG_NOERROR, 0)) 990a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return err; 991a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 992a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* The back-references which are in initial states can epsilon transit, 993a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner since in this case all of the subexpressions can be null. 994a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Then we add epsilon closures of the nodes which are the next nodes of 995a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner the back-references. */ 996a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->nbackref > 0) 997a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (i = 0; i < init_nodes.nelem; ++i) 998a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 999a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx node_idx = init_nodes.elems[i]; 1000a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_type_t type = dfa->nodes[node_idx].type; 1001a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1002a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx clexp_idx; 1003a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (type != OP_BACK_REF) 1004a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner continue; 1005a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx) 1006a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1007a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_t *clexp_node; 1008a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner clexp_node = dfa->nodes + init_nodes.elems[clexp_idx]; 1009a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (clexp_node->type == OP_CLOSE_SUBEXP 1010a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx) 1011a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1012a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1013a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (clexp_idx == init_nodes.nelem) 1014a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner continue; 1015a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1016a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (type == OP_BACK_REF) 1017a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1018a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx dest_idx = dfa->edests[node_idx].elems[0]; 1019a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!re_node_set_contains (&init_nodes, dest_idx)) 1020a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1021a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx); 1022a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner i = 0; 1023a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1024a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1025a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1026a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1027a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* It must be the first time to invoke acquire_state. */ 1028a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0); 1029a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* We don't check ERR here, since the initial state must not be NULL. */ 1030a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (dfa->init_state == NULL, 0)) 1031a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return err; 1032a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->init_state->has_constraint) 1033a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1034a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes, 1035a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner CONTEXT_WORD); 1036a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes, 1037a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner CONTEXT_NEWLINE); 1038a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->init_state_begbuf = re_acquire_state_context (&err, dfa, 1039a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner &init_nodes, 1040a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner CONTEXT_NEWLINE 1041a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner | CONTEXT_BEGBUF); 1042a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL 1043a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner || dfa->init_state_begbuf == NULL, 0)) 1044a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return err; 1045a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1046a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 1047a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->init_state_word = dfa->init_state_nl 1048a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner = dfa->init_state_begbuf = dfa->init_state; 1049a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1050a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_node_set_free (&init_nodes); 1051a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 1052a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 1053a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1054a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 1055a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* If it is possible to do searching in single byte encoding instead of UTF-8 1056a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change 1057a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner DFA nodes where needed. */ 1058a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1059a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic void 1060a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turneroptimize_utf8 (re_dfa_t *dfa) 1061a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 1062a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx node; 1063a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int i; 1064a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bool mb_chars = false; 1065a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bool has_period = false; 1066a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1067a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (node = 0; node < dfa->nodes_len; ++node) 1068a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner switch (dfa->nodes[node].type) 1069a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1070a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case CHARACTER: 1071a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->nodes[node].opr.c >= ASCII_CHARS) 1072a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mb_chars = true; 1073a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1074a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case ANCHOR: 1075a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner switch (dfa->nodes[node].opr.ctx_type) 1076a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1077a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case LINE_FIRST: 1078a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case LINE_LAST: 1079a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case BUF_FIRST: 1080a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case BUF_LAST: 1081a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1082a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner default: 1083a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Word anchors etc. cannot be handled. It's okay to test 1084a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner opr.ctx_type since constraints (for all DFA nodes) are 1085a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner created by ORing one or more opr.ctx_type values. */ 1086a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return; 1087a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1088a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1089a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_PERIOD: 1090a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner has_period = true; 1091a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1092a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_BACK_REF: 1093a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_ALT: 1094a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case END_OF_RE: 1095a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_DUP_ASTERISK: 1096a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_OPEN_SUBEXP: 1097a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_CLOSE_SUBEXP: 1098a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1099a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case COMPLEX_BRACKET: 1100a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return; 1101a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case SIMPLE_BRACKET: 1102a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Just double check. */ 1103a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1104a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int rshift = (ASCII_CHARS % BITSET_WORD_BITS == 0 1105a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ? 0 1106a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner : BITSET_WORD_BITS - ASCII_CHARS % BITSET_WORD_BITS); 1107a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (i = ASCII_CHARS / BITSET_WORD_BITS; i < BITSET_WORDS; ++i) 1108a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1109a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->nodes[node].opr.sbcset[i] >> rshift != 0) 1110a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return; 1111a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner rshift = 0; 1112a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1113a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1114a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1115a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner default: 1116a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner abort (); 1117a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1118a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1119a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (mb_chars || has_period) 1120a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (node = 0; node < dfa->nodes_len; ++node) 1121a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1122a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->nodes[node].type == CHARACTER 1123a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && dfa->nodes[node].opr.c >= ASCII_CHARS) 1124a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->nodes[node].mb_partial = 0; 1125a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (dfa->nodes[node].type == OP_PERIOD) 1126a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->nodes[node].type = OP_UTF8_PERIOD; 1127a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1128a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1129a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* The search can be in single byte locale. */ 1130a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->mb_cur_max = 1; 1131a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->is_utf8 = 0; 1132a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->has_mb_node = dfa->nbackref > 0 || has_period; 1133a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 1134a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 1135a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1136a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Analyze the structure tree, and calculate "first", "next", "edest", 1137a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner "eclosure", and "inveclosure". */ 1138a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1139a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 1140a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turneranalyze (regex_t *preg) 1141a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 1142a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_dfa_t *dfa = (re_dfa_t *) preg->buffer; 1143a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_errcode_t ret; 1144a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1145a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Allocate arrays. */ 1146a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->nexts = re_malloc (Idx, dfa->nodes_alloc); 1147a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc); 1148a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc); 1149a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); 1150a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL 1151a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner || dfa->eclosures == NULL, 0)) 1152a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 1153a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1154a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->subexp_map = re_malloc (Idx, preg->re_nsub); 1155a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->subexp_map != NULL) 1156a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1157a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx i; 1158a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (i = 0; i < preg->re_nsub; i++) 1159a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->subexp_map[i] = i; 1160a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preorder (dfa->str_tree, optimize_subexps, dfa); 1161a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (i = 0; i < preg->re_nsub; i++) 1162a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->subexp_map[i] != i) 1163a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1164a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (i == preg->re_nsub) 1165a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1166a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner free (dfa->subexp_map); 1167a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->subexp_map = NULL; 1168a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1169a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1170a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1171a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ret = postorder (dfa->str_tree, lower_subexps, preg); 1172a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (ret != REG_NOERROR, 0)) 1173a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return ret; 1174a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ret = postorder (dfa->str_tree, calc_first, dfa); 1175a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (ret != REG_NOERROR, 0)) 1176a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return ret; 1177a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner preorder (dfa->str_tree, calc_next, dfa); 1178a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ret = preorder (dfa->str_tree, link_nfa_nodes, dfa); 1179a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (ret != REG_NOERROR, 0)) 1180a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return ret; 1181a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ret = calc_eclosure (dfa); 1182a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (ret != REG_NOERROR, 0)) 1183a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return ret; 1184a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1185a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* We only need this during the prune_impossible_nodes pass in regexec.c; 1186a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */ 1187a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match) 1188a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner || dfa->nbackref) 1189a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1190a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); 1191a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (dfa->inveclosures == NULL, 0)) 1192a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 1193a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ret = calc_inveclosure (dfa); 1194a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1195a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1196a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return ret; 1197a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 1198a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1199a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Our parse trees are very unbalanced, so we cannot use a stack to 1200a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner implement parse tree visits. Instead, we use parent pointers and 1201a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner some hairy code in these two functions. */ 1202a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 1203a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerpostorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), 1204a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner void *extra) 1205a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 1206a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *node, *prev; 1207a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1208a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (node = root; ; ) 1209a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1210a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Descend down the tree, preferably to the left (or to the right 1211a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if that's the only child). */ 1212a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner while (node->left || node->right) 1213a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (node->left) 1214a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node = node->left; 1215a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 1216a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node = node->right; 1217a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1218a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner do 1219a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1220a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_errcode_t err = fn (extra, node); 1221a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (err != REG_NOERROR, 0)) 1222a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return err; 1223a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (node->parent == NULL) 1224a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 1225a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner prev = node; 1226a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node = node->parent; 1227a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1228a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Go up while we have a node that is reached from the right. */ 1229a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner while (node->right == prev || node->right == NULL); 1230a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node = node->right; 1231a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1232a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 1233a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1234a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 1235a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerpreorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), 1236a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner void *extra) 1237a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 1238a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *node; 1239a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1240a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (node = root; ; ) 1241a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1242a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_errcode_t err = fn (extra, node); 1243a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (err != REG_NOERROR, 0)) 1244a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return err; 1245a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1246a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Go to the left node, or up and to the right. */ 1247a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (node->left) 1248a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node = node->left; 1249a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 1250a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1251a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *prev = NULL; 1252a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner while (node->right == prev || node->right == NULL) 1253a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1254a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner prev = node; 1255a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node = node->parent; 1256a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!node) 1257a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 1258a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1259a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node = node->right; 1260a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1261a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1262a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 1263a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1264a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell 1265a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_search_internal to map the inner one's opr.idx to this one's. Adjust 1266a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner backreferences as well. Requires a preorder visit. */ 1267a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 1268a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turneroptimize_subexps (void *extra, bin_tree_t *node) 1269a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 1270a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_dfa_t *dfa = (re_dfa_t *) extra; 1271a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1272a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (node->token.type == OP_BACK_REF && dfa->subexp_map) 1273a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1274a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int idx = node->token.opr.idx; 1275a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node->token.opr.idx = dfa->subexp_map[idx]; 1276a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->used_bkref_map |= 1 << node->token.opr.idx; 1277a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1278a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1279a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (node->token.type == SUBEXP 1280a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && node->left && node->left->token.type == SUBEXP) 1281a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1282a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx other_idx = node->left->token.opr.idx; 1283a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1284a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node->left = node->left->left; 1285a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (node->left) 1286a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node->left->parent = node; 1287a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1288a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; 1289a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (other_idx < BITSET_WORD_BITS) 1290a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx); 1291a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1292a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1293a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 1294a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 1295a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1296a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation 1297a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */ 1298a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 1299a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerlower_subexps (void *extra, bin_tree_t *node) 1300a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 1301a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner regex_t *preg = (regex_t *) extra; 1302a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_errcode_t err = REG_NOERROR; 1303a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1304a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (node->left && node->left->token.type == SUBEXP) 1305a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1306a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node->left = lower_subexp (&err, preg, node->left); 1307a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (node->left) 1308a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node->left->parent = node; 1309a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1310a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (node->right && node->right->token.type == SUBEXP) 1311a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1312a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node->right = lower_subexp (&err, preg, node->right); 1313a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (node->right) 1314a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node->right->parent = node; 1315a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1316a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1317a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return err; 1318a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 1319a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1320a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t * 1321a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerlower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node) 1322a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 1323a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_dfa_t *dfa = (re_dfa_t *) preg->buffer; 1324a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *body = node->left; 1325a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *op, *cls, *tree1, *tree; 1326a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1327a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (preg->no_sub 1328a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* We do not optimize empty subexpressions, because otherwise we may 1329a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner have bad CONCAT nodes with NULL children. This is obviously not 1330a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner very common, so we do not lose much. An example that triggers 1331a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner this case is the sed "script" /\(\)/x. */ 1332a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && node->left != NULL 1333a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && (node->token.opr.idx >= BITSET_WORD_BITS 1334a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner || !(dfa->used_bkref_map 1335a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner & ((bitset_word_t) 1 << node->token.opr.idx)))) 1336a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return node->left; 1337a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1338a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Convert the SUBEXP node to the concatenation of an 1339a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */ 1340a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP); 1341a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP); 1342a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls; 1343a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = create_tree (dfa, op, tree1, CONCAT); 1344a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0)) 1345a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1346a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_ESPACE; 1347a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 1348a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1349a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1350a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx; 1351a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp; 1352a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return tree; 1353a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 1354a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1355a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton 1356a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner nodes. Requires a postorder visit. */ 1357a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 1358a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnercalc_first (void *extra, bin_tree_t *node) 1359a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 1360a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_dfa_t *dfa = (re_dfa_t *) extra; 1361a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (node->token.type == CONCAT) 1362a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1363a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node->first = node->left->first; 1364a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node->node_idx = node->left->node_idx; 1365a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1366a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 1367a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1368a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node->first = node; 1369a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node->node_idx = re_dfa_add_node (dfa, node->token); 1370a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (node->node_idx == REG_MISSING, 0)) 1371a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 1372a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (node->token.type == ANCHOR) 1373a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type; 1374a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1375a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 1376a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 1377a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1378a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Pass 2: compute NEXT on the tree. Preorder visit. */ 1379a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 1380a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnercalc_next (void *extra, bin_tree_t *node) 1381a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 1382a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner switch (node->token.type) 1383a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1384a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_DUP_ASTERISK: 1385a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node->left->next = node; 1386a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1387a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case CONCAT: 1388a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node->left->next = node->right->first; 1389a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node->right->next = node->next; 1390a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1391a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner default: 1392a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (node->left) 1393a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node->left->next = node->next; 1394a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (node->right) 1395a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node->right->next = node->next; 1396a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1397a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1398a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 1399a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 1400a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1401a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */ 1402a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 1403a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerlink_nfa_nodes (void *extra, bin_tree_t *node) 1404a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 1405a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_dfa_t *dfa = (re_dfa_t *) extra; 1406a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx idx = node->node_idx; 1407a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_errcode_t err = REG_NOERROR; 1408a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1409a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner switch (node->token.type) 1410a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1411a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case CONCAT: 1412a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1413a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1414a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case END_OF_RE: 1415a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner assert (node->next == NULL); 1416a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1417a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1418a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_DUP_ASTERISK: 1419a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_ALT: 1420a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1421a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx left, right; 1422a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->has_plural_match = 1; 1423a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (node->left != NULL) 1424a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner left = node->left->first->node_idx; 1425a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 1426a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner left = node->next->node_idx; 1427a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (node->right != NULL) 1428a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner right = node->right->first->node_idx; 1429a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 1430a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner right = node->next->node_idx; 1431a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner assert (REG_VALID_INDEX (left)); 1432a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner assert (REG_VALID_INDEX (right)); 1433a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner err = re_node_set_init_2 (dfa->edests + idx, left, right); 1434a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1435a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1436a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1437a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case ANCHOR: 1438a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_OPEN_SUBEXP: 1439a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_CLOSE_SUBEXP: 1440a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx); 1441a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1442a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1443a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_BACK_REF: 1444a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->nexts[idx] = node->next->node_idx; 1445a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (node->token.type == OP_BACK_REF) 1446a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]); 1447a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1448a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1449a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner default: 1450a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner assert (!IS_EPSILON_NODE (node->token.type)); 1451a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->nexts[idx] = node->next->node_idx; 1452a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1453a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1454a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1455a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return err; 1456a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 1457a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1458a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Duplicate the epsilon closure of the node ROOT_NODE. 1459a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Note that duplicated nodes have constraint INIT_CONSTRAINT in addition 1460a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner to their own constraint. */ 1461a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1462a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 1463a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerinternal_function 1464a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerduplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node, 1465a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx root_node, unsigned int init_constraint) 1466a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 1467a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx org_node, clone_node; 1468a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bool ok; 1469a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner unsigned int constraint = init_constraint; 1470a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (org_node = top_org_node, clone_node = top_clone_node;;) 1471a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1472a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx org_dest, clone_dest; 1473a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->nodes[org_node].type == OP_BACK_REF) 1474a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1475a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* If the back reference epsilon-transit, its destination must 1476a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner also have the constraint. Then duplicate the epsilon closure 1477a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner of the destination of the back reference, and store it in 1478a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner edests of the back reference. */ 1479a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner org_dest = dfa->nexts[org_node]; 1480a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_node_set_empty (dfa->edests + clone_node); 1481a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner clone_dest = duplicate_node (dfa, org_dest, constraint); 1482a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (clone_dest == REG_MISSING, 0)) 1483a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 1484a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->nexts[clone_node] = dfa->nexts[org_node]; 1485a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1486a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (! ok, 0)) 1487a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 1488a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1489a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (dfa->edests[org_node].nelem == 0) 1490a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1491a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* In case of the node can't epsilon-transit, don't duplicate the 1492a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner destination and store the original destination as the 1493a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner destination of the node. */ 1494a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->nexts[clone_node] = dfa->nexts[org_node]; 1495a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1496a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1497a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (dfa->edests[org_node].nelem == 1) 1498a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1499a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* In case of the node can epsilon-transit, and it has only one 1500a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner destination. */ 1501a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner org_dest = dfa->edests[org_node].elems[0]; 1502a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_node_set_empty (dfa->edests + clone_node); 1503a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner clone_dest = search_duplicated_node (dfa, org_dest, constraint); 1504a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* If the node is root_node itself, it means the epsilon closure 1505a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner has a loop. Then tie it to the destination of the root_node. */ 1506a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (org_node == root_node && clone_node != org_node) 1507a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1508a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ok = re_node_set_insert (dfa->edests + clone_node, org_dest); 1509a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (! ok, 0)) 1510a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 1511a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1512a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1513a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* In case the node has another constraint, append it. */ 1514a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner constraint |= dfa->nodes[org_node].constraint; 1515a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner clone_dest = duplicate_node (dfa, org_dest, constraint); 1516a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (clone_dest == REG_MISSING, 0)) 1517a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 1518a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1519a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (! ok, 0)) 1520a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 1521a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1522a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else /* dfa->edests[org_node].nelem == 2 */ 1523a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1524a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* In case of the node can epsilon-transit, and it has two 1525a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner destinations. In the bin_tree_t and DFA, that's '|' and '*'. */ 1526a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner org_dest = dfa->edests[org_node].elems[0]; 1527a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_node_set_empty (dfa->edests + clone_node); 1528a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Search for a duplicated node which satisfies the constraint. */ 1529a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner clone_dest = search_duplicated_node (dfa, org_dest, constraint); 1530a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (clone_dest == REG_MISSING) 1531a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1532a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* There is no such duplicated node, create a new one. */ 1533a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_errcode_t err; 1534a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner clone_dest = duplicate_node (dfa, org_dest, constraint); 1535a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (clone_dest == REG_MISSING, 0)) 1536a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 1537a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1538a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (! ok, 0)) 1539a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 1540a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner err = duplicate_node_closure (dfa, org_dest, clone_dest, 1541a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner root_node, constraint); 1542a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (err != REG_NOERROR, 0)) 1543a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return err; 1544a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1545a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 1546a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1547a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* There is a duplicated node which satisfy the constraint, 1548a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner use it to avoid infinite loop. */ 1549a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1550a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (! ok, 0)) 1551a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 1552a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1553a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1554a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner org_dest = dfa->edests[org_node].elems[1]; 1555a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner clone_dest = duplicate_node (dfa, org_dest, constraint); 1556a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (clone_dest == REG_MISSING, 0)) 1557a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 1558a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1559a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (! ok, 0)) 1560a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 1561a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1562a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner org_node = org_dest; 1563a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner clone_node = clone_dest; 1564a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1565a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 1566a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 1567a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1568a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Search for a node which is duplicated from the node ORG_NODE, and 1569a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner satisfies the constraint CONSTRAINT. */ 1570a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1571a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic Idx 1572a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnersearch_duplicated_node (const re_dfa_t *dfa, Idx org_node, 1573a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner unsigned int constraint) 1574a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 1575a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx idx; 1576a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx) 1577a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1578a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (org_node == dfa->org_indices[idx] 1579a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && constraint == dfa->nodes[idx].constraint) 1580a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return idx; /* Found. */ 1581a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1582a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_MISSING; /* Not found. */ 1583a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 1584a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1585a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. 1586a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Return the index of the new node, or REG_MISSING if insufficient storage is 1587a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner available. */ 1588a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1589a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic Idx 1590a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerduplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint) 1591a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 1592a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]); 1593a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (dup_idx != REG_MISSING, 1)) 1594a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1595a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->nodes[dup_idx].constraint = constraint; 1596a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint; 1597a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->nodes[dup_idx].duplicated = 1; 1598a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1599a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Store the index of the original node. */ 1600a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->org_indices[dup_idx] = org_idx; 1601a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1602a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return dup_idx; 1603a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 1604a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1605a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 1606a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnercalc_inveclosure (re_dfa_t *dfa) 1607a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 1608a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx src, idx; 1609a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bool ok; 1610a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (idx = 0; idx < dfa->nodes_len; ++idx) 1611a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_node_set_init_empty (dfa->inveclosures + idx); 1612a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1613a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (src = 0; src < dfa->nodes_len; ++src) 1614a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1615a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx *elems = dfa->eclosures[src].elems; 1616a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) 1617a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1618a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src); 1619a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (! ok, 0)) 1620a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 1621a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1622a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1623a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1624a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 1625a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 1626a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1627a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Calculate "eclosure" for all the node in DFA. */ 1628a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1629a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 1630a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnercalc_eclosure (re_dfa_t *dfa) 1631a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 1632a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx node_idx; 1633a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bool incomplete; 1634a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef DEBUG 1635a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner assert (dfa->nodes_len > 0); 1636a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 1637a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner incomplete = false; 1638a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* For each nodes, calculate epsilon closure. */ 1639a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (node_idx = 0; ; ++node_idx) 1640a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1641a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_errcode_t err; 1642a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_node_set eclosure_elem; 1643a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (node_idx == dfa->nodes_len) 1644a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1645a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!incomplete) 1646a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1647a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner incomplete = false; 1648a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node_idx = 0; 1649a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1650a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1651a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef DEBUG 1652a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner assert (dfa->eclosures[node_idx].nelem != REG_MISSING); 1653a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 1654a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1655a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* If we have already calculated, skip it. */ 1656a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->eclosures[node_idx].nelem != 0) 1657a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner continue; 1658a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Calculate epsilon closure of `node_idx'. */ 1659a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true); 1660a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (err != REG_NOERROR, 0)) 1661a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return err; 1662a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1663a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->eclosures[node_idx].nelem == 0) 1664a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1665a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner incomplete = true; 1666a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_node_set_free (&eclosure_elem); 1667a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1668a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1669a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 1670a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 1671a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1672a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Calculate epsilon closure of NODE. */ 1673a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1674a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 1675a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnercalc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root) 1676a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 1677a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_errcode_t err; 1678a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx i; 1679a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bool incomplete; 1680a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bool ok; 1681a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_node_set eclosure; 1682a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner incomplete = false; 1683a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); 1684a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (err != REG_NOERROR, 0)) 1685a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return err; 1686a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1687a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* This indicates that we are calculating this node now. 1688a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner We reference this value to avoid infinite loop. */ 1689a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->eclosures[node].nelem = REG_MISSING; 1690a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1691a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* If the current node has constraints, duplicate all nodes 1692a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner since they must inherit the constraints. */ 1693a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->nodes[node].constraint 1694a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && dfa->edests[node].nelem 1695a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && !dfa->nodes[dfa->edests[node].elems[0]].duplicated) 1696a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1697a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner err = duplicate_node_closure (dfa, node, node, node, 1698a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->nodes[node].constraint); 1699a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (err != REG_NOERROR, 0)) 1700a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return err; 1701a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1702a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1703a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Expand each epsilon destination nodes. */ 1704a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (IS_EPSILON_NODE(dfa->nodes[node].type)) 1705a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (i = 0; i < dfa->edests[node].nelem; ++i) 1706a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1707a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_node_set eclosure_elem; 1708a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx edest = dfa->edests[node].elems[i]; 1709a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* If calculating the epsilon closure of `edest' is in progress, 1710a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return intermediate result. */ 1711a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->eclosures[edest].nelem == REG_MISSING) 1712a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1713a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner incomplete = true; 1714a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner continue; 1715a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1716a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* If we haven't calculated the epsilon closure of `edest' yet, 1717a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner calculate now. Otherwise use calculated epsilon closure. */ 1718a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->eclosures[edest].nelem == 0) 1719a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1720a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false); 1721a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (err != REG_NOERROR, 0)) 1722a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return err; 1723a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1724a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 1725a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner eclosure_elem = dfa->eclosures[edest]; 1726a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Merge the epsilon closure of `edest'. */ 1727a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_node_set_merge (&eclosure, &eclosure_elem); 1728a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* If the epsilon closure of `edest' is incomplete, 1729a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner the epsilon closure of this node is also incomplete. */ 1730a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->eclosures[edest].nelem == 0) 1731a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1732a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner incomplete = true; 1733a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_node_set_free (&eclosure_elem); 1734a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1735a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1736a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1737a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Epsilon closures include itself. */ 1738a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ok = re_node_set_insert (&eclosure, node); 1739a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (! ok, 0)) 1740a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 1741a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (incomplete && !root) 1742a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->eclosures[node].nelem = 0; 1743a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 1744a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->eclosures[node] = eclosure; 1745a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *new_set = eclosure; 1746a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 1747a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 1748a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1749a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Functions for token which are used in the parser. */ 1750a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1751a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Fetch a token from INPUT. 1752a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner We must not use this function inside bracket expressions. */ 1753a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1754a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic void 1755a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerinternal_function 1756a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerfetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax) 1757a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 1758a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_string_skip_bytes (input, peek_token (result, input, syntax)); 1759a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 1760a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1761a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Peek a token from INPUT, and return the length of the token. 1762a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner We must not use this function inside bracket expressions. */ 1763a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1764a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic int 1765a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerinternal_function 1766a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerpeek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) 1767a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 1768a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner unsigned char c; 1769a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1770a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (re_string_eoi (input)) 1771a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1772a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = END_OF_RE; 1773a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return 0; 1774a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1775a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1776a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner c = re_string_peek_byte (input, 0); 1777a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->opr.c = c; 1778a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1779a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->word_char = 0; 1780a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 1781a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->mb_partial = 0; 1782a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (input->mb_cur_max > 1 && 1783a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner !re_string_first_byte (input, re_string_cur_idx (input))) 1784a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1785a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = CHARACTER; 1786a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->mb_partial = 1; 1787a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return 1; 1788a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1789a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 1790a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (c == '\\') 1791a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1792a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner unsigned char c2; 1793a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (re_string_cur_idx (input) + 1 >= re_string_length (input)) 1794a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1795a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = BACK_SLASH; 1796a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return 1; 1797a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1798a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1799a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner c2 = re_string_peek_byte_case (input, 1); 1800a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->opr.c = c2; 1801a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = CHARACTER; 1802a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 1803a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (input->mb_cur_max > 1) 1804a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1805a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner wint_t wc = re_string_wchar_at (input, 1806a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_string_cur_idx (input) + 1); 1807a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; 1808a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1809a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 1810a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 1811a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->word_char = IS_WORD_CHAR (c2) != 0; 1812a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1813a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner switch (c2) 1814a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1815a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '|': 1816a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR)) 1817a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_ALT; 1818a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1819a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '1': case '2': case '3': case '4': case '5': 1820a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '6': case '7': case '8': case '9': 1821a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & RE_NO_BK_REFS)) 1822a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1823a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_BACK_REF; 1824a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->opr.idx = c2 - '1'; 1825a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1826a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1827a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '<': 1828a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & RE_NO_GNU_OPS)) 1829a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1830a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = ANCHOR; 1831a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->opr.ctx_type = WORD_FIRST; 1832a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1833a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1834a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '>': 1835a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & RE_NO_GNU_OPS)) 1836a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1837a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = ANCHOR; 1838a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->opr.ctx_type = WORD_LAST; 1839a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1840a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1841a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case 'b': 1842a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & RE_NO_GNU_OPS)) 1843a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1844a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = ANCHOR; 1845a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->opr.ctx_type = WORD_DELIM; 1846a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1847a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1848a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case 'B': 1849a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & RE_NO_GNU_OPS)) 1850a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1851a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = ANCHOR; 1852a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->opr.ctx_type = NOT_WORD_DELIM; 1853a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1854a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1855a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case 'w': 1856a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & RE_NO_GNU_OPS)) 1857a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_WORD; 1858a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1859a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case 'W': 1860a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & RE_NO_GNU_OPS)) 1861a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_NOTWORD; 1862a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1863a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case 's': 1864a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & RE_NO_GNU_OPS)) 1865a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_SPACE; 1866a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1867a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case 'S': 1868a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & RE_NO_GNU_OPS)) 1869a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_NOTSPACE; 1870a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1871a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '`': 1872a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & RE_NO_GNU_OPS)) 1873a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1874a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = ANCHOR; 1875a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->opr.ctx_type = BUF_FIRST; 1876a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1877a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1878a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '\'': 1879a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & RE_NO_GNU_OPS)) 1880a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1881a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = ANCHOR; 1882a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->opr.ctx_type = BUF_LAST; 1883a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1884a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1885a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '(': 1886a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & RE_NO_BK_PARENS)) 1887a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_OPEN_SUBEXP; 1888a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1889a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case ')': 1890a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & RE_NO_BK_PARENS)) 1891a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_CLOSE_SUBEXP; 1892a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1893a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '+': 1894a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) 1895a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_DUP_PLUS; 1896a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1897a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '?': 1898a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) 1899a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_DUP_QUESTION; 1900a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1901a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '{': 1902a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) 1903a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_OPEN_DUP_NUM; 1904a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1905a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '}': 1906a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) 1907a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_CLOSE_DUP_NUM; 1908a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1909a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner default: 1910a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1911a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1912a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return 2; 1913a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1914a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1915a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = CHARACTER; 1916a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 1917a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (input->mb_cur_max > 1) 1918a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1919a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input)); 1920a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; 1921a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1922a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 1923a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 1924a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->word_char = IS_WORD_CHAR (token->opr.c); 1925a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 1926a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner switch (c) 1927a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1928a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '\n': 1929a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (syntax & RE_NEWLINE_ALT) 1930a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_ALT; 1931a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1932a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '|': 1933a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR)) 1934a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_ALT; 1935a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1936a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '*': 1937a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_DUP_ASTERISK; 1938a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1939a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '+': 1940a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) 1941a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_DUP_PLUS; 1942a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1943a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '?': 1944a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) 1945a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_DUP_QUESTION; 1946a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1947a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '{': 1948a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) 1949a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_OPEN_DUP_NUM; 1950a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1951a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '}': 1952a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) 1953a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_CLOSE_DUP_NUM; 1954a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1955a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '(': 1956a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (syntax & RE_NO_BK_PARENS) 1957a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_OPEN_SUBEXP; 1958a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1959a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case ')': 1960a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (syntax & RE_NO_BK_PARENS) 1961a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_CLOSE_SUBEXP; 1962a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1963a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '[': 1964a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_OPEN_BRACKET; 1965a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1966a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '.': 1967a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_PERIOD; 1968a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1969a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '^': 1970a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) && 1971a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_string_cur_idx (input) != 0) 1972a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1973a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner char prev = re_string_peek_byte (input, -1); 1974a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & RE_NEWLINE_ALT) || prev != '\n') 1975a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1976a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1977a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = ANCHOR; 1978a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->opr.ctx_type = LINE_FIRST; 1979a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1980a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '$': 1981a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && 1982a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_string_cur_idx (input) + 1 != re_string_length (input)) 1983a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 1984a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_t next; 1985a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_string_skip_bytes (input, 1); 1986a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner peek_token (&next, input, syntax); 1987a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_string_skip_bytes (input, -1); 1988a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP) 1989a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1990a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1991a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = ANCHOR; 1992a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->opr.ctx_type = LINE_LAST; 1993a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1994a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner default: 1995a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 1996a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 1997a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return 1; 1998a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 1999a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2000a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Peek a token from INPUT, and return the length of the token. 2001a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner We must not use this function out of bracket expressions. */ 2002a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2003a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic int 2004a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerinternal_function 2005a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerpeek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) 2006a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 2007a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner unsigned char c; 2008a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (re_string_eoi (input)) 2009a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2010a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = END_OF_RE; 2011a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return 0; 2012a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2013a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner c = re_string_peek_byte (input, 0); 2014a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->opr.c = c; 2015a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2016a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 2017a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (input->mb_cur_max > 1 && 2018a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner !re_string_first_byte (input, re_string_cur_idx (input))) 2019a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2020a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = CHARACTER; 2021a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return 1; 2022a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2023a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* RE_ENABLE_I18N */ 2024a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2025a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) 2026a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && re_string_cur_idx (input) + 1 < re_string_length (input)) 2027a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2028a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* In this case, '\' escape a character. */ 2029a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner unsigned char c2; 2030a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_string_skip_bytes (input, 1); 2031a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner c2 = re_string_peek_byte (input, 0); 2032a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->opr.c = c2; 2033a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = CHARACTER; 2034a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return 1; 2035a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2036a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (c == '[') /* '[' is a special char in a bracket exps. */ 2037a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2038a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner unsigned char c2; 2039a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int token_len; 2040a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (re_string_cur_idx (input) + 1 < re_string_length (input)) 2041a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner c2 = re_string_peek_byte (input, 1); 2042a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 2043a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner c2 = 0; 2044a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->opr.c = c2; 2045a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token_len = 2; 2046a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner switch (c2) 2047a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2048a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '.': 2049a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_OPEN_COLL_ELEM; 2050a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 2051a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '=': 2052a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_OPEN_EQUIV_CLASS; 2053a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 2054a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case ':': 2055a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (syntax & RE_CHAR_CLASSES) 2056a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2057a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_OPEN_CHAR_CLASS; 2058a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 2059a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2060a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* else fall through. */ 2061a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner default: 2062a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = CHARACTER; 2063a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->opr.c = c; 2064a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token_len = 1; 2065a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 2066a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2067a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return token_len; 2068a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2069a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner switch (c) 2070a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2071a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '-': 2072a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_CHARSET_RANGE; 2073a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 2074a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case ']': 2075a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_CLOSE_BRACKET; 2076a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 2077a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case '^': 2078a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = OP_NON_MATCH_LIST; 2079a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 2080a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner default: 2081a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = CHARACTER; 2082a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2083a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return 1; 2084a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 2085a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2086a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Functions for parser. */ 2087a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2088a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Entry point of the parser. 2089a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Parse the regular expression REGEXP and return the structure tree. 2090a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner If an error is occured, ERR is set by error code, and return NULL. 2091a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner This function build the following tree, from regular expression <reg_exp>: 2092a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner CAT 2093a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner / \ 2094a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner / \ 2095a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner <reg_exp> EOR 2096a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2097a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner CAT means concatenation. 2098a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner EOR means end of regular expression. */ 2099a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2100a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t * 2101a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerparse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax, 2102a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_errcode_t *err) 2103a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 2104a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_dfa_t *dfa = (re_dfa_t *) preg->buffer; 2105a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *tree, *eor, *root; 2106a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_t current_token; 2107a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->syntax = syntax; 2108a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner fetch_token (¤t_token, regexp, syntax | RE_CARET_ANCHORS_HERE); 2109a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err); 2110a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2111a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2112a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner eor = create_tree (dfa, NULL, NULL, END_OF_RE); 2113a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (tree != NULL) 2114a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner root = create_tree (dfa, tree, eor, CONCAT); 2115a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 2116a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner root = eor; 2117a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (eor == NULL || root == NULL, 0)) 2118a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2119a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_ESPACE; 2120a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2121a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2122a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return root; 2123a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 2124a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2125a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* This function build the following tree, from regular expression 2126a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner <branch1>|<branch2>: 2127a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ALT 2128a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner / \ 2129a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner / \ 2130a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner <branch1> <branch2> 2131a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2132a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ALT means alternative, which represents the operator `|'. */ 2133a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2134a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t * 2135a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerparse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, 2136a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_syntax_t syntax, Idx nest, reg_errcode_t *err) 2137a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 2138a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_dfa_t *dfa = (re_dfa_t *) preg->buffer; 2139a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *tree, *branch = NULL; 2140a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = parse_branch (regexp, preg, token, syntax, nest, err); 2141a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2142a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2143a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2144a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner while (token->type == OP_ALT) 2145a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2146a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); 2147a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (token->type != OP_ALT && token->type != END_OF_RE 2148a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) 2149a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2150a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner branch = parse_branch (regexp, preg, token, syntax, nest, err); 2151a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (*err != REG_NOERROR && branch == NULL, 0)) 2152a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2153a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2154a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 2155a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner branch = NULL; 2156a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = create_tree (dfa, tree, branch, OP_ALT); 2157a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (tree == NULL, 0)) 2158a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2159a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_ESPACE; 2160a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2161a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2162a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2163a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return tree; 2164a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 2165a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2166a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* This function build the following tree, from regular expression 2167a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner <exp1><exp2>: 2168a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner CAT 2169a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner / \ 2170a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner / \ 2171a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner <exp1> <exp2> 2172a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2173a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner CAT means concatenation. */ 2174a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2175a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t * 2176a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerparse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token, 2177a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_syntax_t syntax, Idx nest, reg_errcode_t *err) 2178a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 2179a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *tree, *expr; 2180a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_dfa_t *dfa = (re_dfa_t *) preg->buffer; 2181a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = parse_expression (regexp, preg, token, syntax, nest, err); 2182a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2183a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2184a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2185a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner while (token->type != OP_ALT && token->type != END_OF_RE 2186a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) 2187a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2188a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner expr = parse_expression (regexp, preg, token, syntax, nest, err); 2189a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (*err != REG_NOERROR && expr == NULL, 0)) 2190a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2191a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2192a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2193a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (tree != NULL && expr != NULL) 2194a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2195a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = create_tree (dfa, tree, expr, CONCAT); 2196a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (tree == NULL) 2197a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2198a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_ESPACE; 2199a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2200a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2201a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2202a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (tree == NULL) 2203a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = expr; 2204a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Otherwise expr == NULL, we don't need to create new tree. */ 2205a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2206a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return tree; 2207a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 2208a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2209a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* This function build the following tree, from regular expression a*: 2210a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner * 2211a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner | 2212a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner a 2213a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner*/ 2214a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2215a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t * 2216a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerparse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, 2217a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_syntax_t syntax, Idx nest, reg_errcode_t *err) 2218a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 2219a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_dfa_t *dfa = (re_dfa_t *) preg->buffer; 2220a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *tree; 2221a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner switch (token->type) 2222a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2223a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case CHARACTER: 2224a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = create_token_tree (dfa, NULL, NULL, token); 2225a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (tree == NULL, 0)) 2226a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2227a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_ESPACE; 2228a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2229a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2230a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 2231a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->mb_cur_max > 1) 2232a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2233a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner while (!re_string_eoi (regexp) 2234a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && !re_string_first_byte (regexp, re_string_cur_idx (regexp))) 2235a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2236a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *mbc_remain; 2237a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner fetch_token (token, regexp, syntax); 2238a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbc_remain = create_token_tree (dfa, NULL, NULL, token); 2239a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = create_tree (dfa, tree, mbc_remain, CONCAT); 2240a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (mbc_remain == NULL || tree == NULL, 0)) 2241a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2242a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_ESPACE; 2243a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2244a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2245a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2246a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2247a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 2248a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 2249a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_OPEN_SUBEXP: 2250a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); 2251a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2252a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2253a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 2254a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_OPEN_BRACKET: 2255a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = parse_bracket_exp (regexp, dfa, token, syntax, err); 2256a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2257a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2258a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 2259a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_BACK_REF: 2260a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1)) 2261a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2262a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_ESUBREG; 2263a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2264a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2265a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->used_bkref_map |= 1 << token->opr.idx; 2266a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = create_token_tree (dfa, NULL, NULL, token); 2267a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (tree == NULL, 0)) 2268a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2269a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_ESPACE; 2270a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2271a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2272a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ++dfa->nbackref; 2273a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->has_mb_node = 1; 2274a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 2275a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_OPEN_DUP_NUM: 2276a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (syntax & RE_CONTEXT_INVALID_DUP) 2277a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2278a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_BADRPT; 2279a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2280a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2281a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* FALLTHROUGH */ 2282a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_DUP_ASTERISK: 2283a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_DUP_PLUS: 2284a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_DUP_QUESTION: 2285a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (syntax & RE_CONTEXT_INVALID_OPS) 2286a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2287a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_BADRPT; 2288a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2289a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2290a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (syntax & RE_CONTEXT_INDEP_OPS) 2291a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2292a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner fetch_token (token, regexp, syntax); 2293a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return parse_expression (regexp, preg, token, syntax, nest, err); 2294a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2295a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* else fall through */ 2296a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_CLOSE_SUBEXP: 2297a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if ((token->type == OP_CLOSE_SUBEXP) && 2298a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) 2299a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2300a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_ERPAREN; 2301a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2302a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2303a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* else fall through */ 2304a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_CLOSE_DUP_NUM: 2305a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* We treat it as a normal character. */ 2306a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2307a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Then we can these characters as normal characters. */ 2308a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = CHARACTER; 2309a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* mb_partial and word_char bits should be initialized already 2310a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner by peek_token. */ 2311a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = create_token_tree (dfa, NULL, NULL, token); 2312a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (tree == NULL, 0)) 2313a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2314a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_ESPACE; 2315a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2316a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2317a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 2318a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case ANCHOR: 2319a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if ((token->opr.ctx_type 2320a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST)) 2321a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && dfa->word_ops_used == 0) 2322a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner init_word_char (dfa); 2323a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (token->opr.ctx_type == WORD_DELIM 2324a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner || token->opr.ctx_type == NOT_WORD_DELIM) 2325a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2326a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *tree_first, *tree_last; 2327a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (token->opr.ctx_type == WORD_DELIM) 2328a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2329a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->opr.ctx_type = WORD_FIRST; 2330a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree_first = create_token_tree (dfa, NULL, NULL, token); 2331a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->opr.ctx_type = WORD_LAST; 2332a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2333a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 2334a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2335a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->opr.ctx_type = INSIDE_WORD; 2336a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree_first = create_token_tree (dfa, NULL, NULL, token); 2337a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->opr.ctx_type = INSIDE_NOTWORD; 2338a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2339a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree_last = create_token_tree (dfa, NULL, NULL, token); 2340a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = create_tree (dfa, tree_first, tree_last, OP_ALT); 2341a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0)) 2342a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2343a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_ESPACE; 2344a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2345a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2346a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2347a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 2348a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2349a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = create_token_tree (dfa, NULL, NULL, token); 2350a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (tree == NULL, 0)) 2351a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2352a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_ESPACE; 2353a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2354a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2355a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2356a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* We must return here, since ANCHORs can't be followed 2357a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner by repetition operators. 2358a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>", 2359a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner it must not be "<ANCHOR(^)><REPEAT(*)>". */ 2360a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner fetch_token (token, regexp, syntax); 2361a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return tree; 2362a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_PERIOD: 2363a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = create_token_tree (dfa, NULL, NULL, token); 2364a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (tree == NULL, 0)) 2365a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2366a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_ESPACE; 2367a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2368a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2369a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->mb_cur_max > 1) 2370a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->has_mb_node = 1; 2371a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 2372a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_WORD: 2373a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_NOTWORD: 2374a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = build_charclass_op (dfa, regexp->trans, 2375a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner (const unsigned char *) "alnum", 2376a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner (const unsigned char *) "_", 2377a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type == OP_NOTWORD, err); 2378a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2379a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2380a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 2381a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_SPACE: 2382a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_NOTSPACE: 2383a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = build_charclass_op (dfa, regexp->trans, 2384a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner (const unsigned char *) "space", 2385a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner (const unsigned char *) "", 2386a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type == OP_NOTSPACE, err); 2387a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2388a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2389a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 2390a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_ALT: 2391a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case END_OF_RE: 2392a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2393a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case BACK_SLASH: 2394a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_EESCAPE; 2395a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2396a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner default: 2397a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Must not happen? */ 2398a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef DEBUG 2399a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner assert (0); 2400a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 2401a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2402a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2403a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner fetch_token (token, regexp, syntax); 2404a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2405a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS 2406a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM) 2407a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2408a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = parse_dup_op (tree, regexp, dfa, token, syntax, err); 2409a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (*err != REG_NOERROR && tree == NULL, 0)) 2410a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2411a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* In BRE consecutive duplications are not allowed. */ 2412a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if ((syntax & RE_CONTEXT_INVALID_DUP) 2413a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && (token->type == OP_DUP_ASTERISK 2414a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner || token->type == OP_OPEN_DUP_NUM)) 2415a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2416a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_BADRPT; 2417a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2418a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2419a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2420a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2421a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return tree; 2422a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 2423a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2424a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* This function build the following tree, from regular expression 2425a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner (<reg_exp>): 2426a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner SUBEXP 2427a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner | 2428a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner <reg_exp> 2429a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner*/ 2430a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2431a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t * 2432a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerparse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, 2433a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_syntax_t syntax, Idx nest, reg_errcode_t *err) 2434a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 2435a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_dfa_t *dfa = (re_dfa_t *) preg->buffer; 2436a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *tree; 2437a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner size_t cur_nsub; 2438a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner cur_nsub = preg->re_nsub++; 2439a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2440a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); 2441a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2442a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* The subexpression may be a null string. */ 2443a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (token->type == OP_CLOSE_SUBEXP) 2444a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = NULL; 2445a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 2446a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2447a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); 2448a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0)) 2449a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_EPAREN; 2450a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (*err != REG_NOERROR, 0)) 2451a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2452a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2453a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2454a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (cur_nsub <= '9' - '1') 2455a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->completed_bkref_map |= 1 << cur_nsub; 2456a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2457a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = create_tree (dfa, tree, NULL, SUBEXP); 2458a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (tree == NULL, 0)) 2459a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2460a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_ESPACE; 2461a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2462a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2463a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree->token.opr.idx = cur_nsub; 2464a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return tree; 2465a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 2466a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2467a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* This function parse repetition operators like "*", "+", "{1,3}" etc. */ 2468a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2469a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t * 2470a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerparse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, 2471a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err) 2472a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 2473a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *tree = NULL, *old_tree = NULL; 2474a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx i, start, end, start_idx = re_string_cur_idx (regexp); 2475a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_t start_token = *token; 2476a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2477a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (token->type == OP_OPEN_DUP_NUM) 2478a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2479a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner end = 0; 2480a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner start = fetch_number (regexp, token, syntax); 2481a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (start == REG_MISSING) 2482a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2483a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (token->type == CHARACTER && token->opr.c == ',') 2484a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner start = 0; /* We treat "{,m}" as "{0,m}". */ 2485a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 2486a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2487a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_BADBR; /* <re>{} is invalid. */ 2488a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2489a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2490a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2491a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (start != REG_ERROR, 1)) 2492a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2493a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* We treat "{n}" as "{n,n}". */ 2494a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner end = ((token->type == OP_CLOSE_DUP_NUM) ? start 2495a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner : ((token->type == CHARACTER && token->opr.c == ',') 2496a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ? fetch_number (regexp, token, syntax) : REG_ERROR)); 2497a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2498a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (start == REG_ERROR || end == REG_ERROR, 0)) 2499a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2500a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Invalid sequence. */ 2501a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0)) 2502a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2503a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (token->type == END_OF_RE) 2504a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_EBRACE; 2505a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 2506a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_BADBR; 2507a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2508a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2509a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2510a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2511a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* If the syntax bit is set, rollback. */ 2512a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_string_set_index (regexp, start_idx); 2513a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *token = start_token; 2514a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = CHARACTER; 2515a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* mb_partial and word_char bits should be already initialized by 2516a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner peek_token. */ 2517a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return elem; 2518a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2519a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2520a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (end != REG_MISSING && start > end, 0)) 2521a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2522a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* First number greater than second. */ 2523a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_BADBR; 2524a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2525a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2526a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2527a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 2528a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2529a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner start = (token->type == OP_DUP_PLUS) ? 1 : 0; 2530a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner end = (token->type == OP_DUP_QUESTION) ? 1 : REG_MISSING; 2531a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2532a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2533a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner fetch_token (token, regexp, syntax); 2534a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2535a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (elem == NULL, 0)) 2536a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2537a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (start == 0 && end == 0, 0)) 2538a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2539a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner postorder (elem, free_tree, NULL); 2540a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2541a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2542a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2543a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */ 2544a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (start > 0, 0)) 2545a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2546a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = elem; 2547a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (i = 2; i <= start; ++i) 2548a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2549a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner elem = duplicate_tree (elem, dfa); 2550a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = create_tree (dfa, tree, elem, CONCAT); 2551a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (elem == NULL || tree == NULL, 0)) 2552a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto parse_dup_op_espace; 2553a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2554a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2555a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (start == end) 2556a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return tree; 2557a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2558a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Duplicate ELEM before it is marked optional. */ 2559a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner elem = duplicate_tree (elem, dfa); 2560a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner old_tree = tree; 2561a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2562a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 2563a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner old_tree = NULL; 2564a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2565a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (elem->token.type == SUBEXP) 2566d0b797fbf45f3cec923b22f93b61e86a1f867844Andrew Hsieh postorder (elem, mark_opt_subexp, (void *) (intptr_t) elem->token.opr.idx); 2567a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2568a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = create_tree (dfa, elem, NULL, 2569a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner (end == REG_MISSING ? OP_DUP_ASTERISK : OP_ALT)); 2570a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (tree == NULL, 0)) 2571a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto parse_dup_op_espace; 2572a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2573a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* This loop is actually executed only when end != REG_MISSING, 2574a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have 2575a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner already created the start+1-th copy. */ 2576a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if ((Idx) -1 < 0 || end != REG_MISSING) 2577a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (i = start + 2; i <= end; ++i) 2578a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2579a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner elem = duplicate_tree (elem, dfa); 2580a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = create_tree (dfa, tree, elem, CONCAT); 2581a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (elem == NULL || tree == NULL, 0)) 2582a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto parse_dup_op_espace; 2583a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2584a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = create_tree (dfa, tree, NULL, OP_ALT); 2585a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (tree == NULL, 0)) 2586a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto parse_dup_op_espace; 2587a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2588a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2589a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (old_tree) 2590a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = create_tree (dfa, old_tree, tree, CONCAT); 2591a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2592a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return tree; 2593a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2594a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner parse_dup_op_espace: 2595a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_ESPACE; 2596a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 2597a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 2598a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2599a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Size of the names for collating symbol/equivalence_class/character_class. 2600a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner I'm not sure, but maybe enough. */ 2601a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#define BRACKET_NAME_BUF_SIZE 32 2602a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2603a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifndef _LIBC 2604a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Local function for parse_bracket_exp only used in case of NOT _LIBC. 2605a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Build the range expression which starts from START_ELEM, and ends 2606a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner at END_ELEM. The result are written to MBCSET and SBCSET. 2607a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner RANGE_ALLOC is the allocated size of mbcset->range_starts, and 2608a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset->range_ends, is a pointer argument sinse we may 2609a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner update it. */ 2610a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2611a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 2612a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerinternal_function 2613a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# ifdef RE_ENABLE_I18N 2614a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerbuild_range_exp (bitset_t sbcset, re_charset_t *mbcset, Idx *range_alloc, 2615a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bracket_elem_t *start_elem, bracket_elem_t *end_elem) 2616a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# else /* not RE_ENABLE_I18N */ 2617a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerbuild_range_exp (bitset_t sbcset, bracket_elem_t *start_elem, 2618a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bracket_elem_t *end_elem) 2619a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# endif /* not RE_ENABLE_I18N */ 2620a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 2621a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner unsigned int start_ch, end_ch; 2622a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Equivalence Classes and Character Classes can't be a range start/end. */ 2623a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS 2624a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, 2625a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 0)) 2626a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ERANGE; 2627a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2628a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* We can handle no multi character collating elements without libc 2629a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner support. */ 2630a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE ((start_elem->type == COLL_SYM 2631a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && strlen ((char *) start_elem->opr.name) > 1) 2632a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner || (end_elem->type == COLL_SYM 2633a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && strlen ((char *) end_elem->opr.name) > 1), 0)) 2634a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ECOLLATE; 2635a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2636a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# ifdef RE_ENABLE_I18N 2637a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2638a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner wchar_t wc; 2639a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner wint_t start_wc; 2640a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner wint_t end_wc; 2641a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; 2642a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2643a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch 2644a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] 2645a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner : 0)); 2646a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch 2647a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] 2648a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner : 0)); 2649a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) 2650a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ? __btowc (start_ch) : start_elem->opr.wch); 2651a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) 2652a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ? __btowc (end_ch) : end_elem->opr.wch); 2653a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (start_wc == WEOF || end_wc == WEOF) 2654a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ECOLLATE; 2655a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner cmp_buf[0] = start_wc; 2656a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner cmp_buf[4] = end_wc; 2657a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (wcscoll (cmp_buf, cmp_buf + 4) > 0) 2658a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ERANGE; 2659a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2660a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Got valid collation sequence values, add them as a new entry. 2661a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner However, for !_LIBC we have no collation elements: if the 2662a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner character set is single byte, the single byte character set 2663a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner that we build below suffices. parse_bracket_exp passes 2664a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner no MBCSET if dfa->mb_cur_max == 1. */ 2665a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (mbcset) 2666a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2667a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Check the space of the arrays. */ 2668a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (*range_alloc == mbcset->nranges, 0)) 2669a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2670a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* There is not enough space, need realloc. */ 2671a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner wchar_t *new_array_start, *new_array_end; 2672a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx new_nranges; 2673a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2674a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* +1 in case of mbcset->nranges is 0. */ 2675a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner new_nranges = 2 * mbcset->nranges + 1; 2676a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Use realloc since mbcset->range_starts and mbcset->range_ends 2677a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner are NULL if *range_alloc == 0. */ 2678a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner new_array_start = re_realloc (mbcset->range_starts, wchar_t, 2679a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner new_nranges); 2680a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner new_array_end = re_realloc (mbcset->range_ends, wchar_t, 2681a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner new_nranges); 2682a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2683a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (new_array_start == NULL || new_array_end == NULL, 0)) 2684a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 2685a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2686a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset->range_starts = new_array_start; 2687a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset->range_ends = new_array_end; 2688a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *range_alloc = new_nranges; 2689a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2690a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2691a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset->range_starts[mbcset->nranges] = start_wc; 2692a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset->range_ends[mbcset->nranges++] = end_wc; 2693a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2694a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2695a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Build the table for single byte characters. */ 2696a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (wc = 0; wc < SBC_MAX; ++wc) 2697a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2698a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner cmp_buf[2] = wc; 2699a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 2700a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) 2701a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_set (sbcset, wc); 2702a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2703a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2704a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# else /* not RE_ENABLE_I18N */ 2705a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2706a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner unsigned int ch; 2707a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch 2708a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] 2709a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner : 0)); 2710a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch 2711a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] 2712a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner : 0)); 2713a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (start_ch > end_ch) 2714a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ERANGE; 2715a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Build the table for single byte characters. */ 2716a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (ch = 0; ch < SBC_MAX; ++ch) 2717a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (start_ch <= ch && ch <= end_ch) 2718a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_set (sbcset, ch); 2719a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2720a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# endif /* not RE_ENABLE_I18N */ 2721a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 2722a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 2723a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* not _LIBC */ 2724a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2725a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifndef _LIBC 2726a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Helper function for parse_bracket_exp only used in case of NOT _LIBC.. 2727a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Build the collating element which is represented by NAME. 2728a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner The result are written to MBCSET and SBCSET. 2729a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a 2730a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner pointer argument since we may update it. */ 2731a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2732a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 2733a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerinternal_function 2734a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerbuild_collating_symbol (bitset_t sbcset, 2735a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# ifdef RE_ENABLE_I18N 2736a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_charset_t *mbcset, Idx *coll_sym_alloc, 2737a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# endif 2738a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const unsigned char *name) 2739a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 2740a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner size_t name_len = strlen ((const char *) name); 2741a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (name_len != 1, 0)) 2742a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ECOLLATE; 2743a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 2744a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2745a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_set (sbcset, name[0]); 2746a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 2747a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2748a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 2749a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* not _LIBC */ 2750a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2751a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* This function parse bracket expression like "[abc]", "[a-c]", 2752a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner "[[.a-a.]]" etc. */ 2753a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2754a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t * 2755a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerparse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, 2756a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_syntax_t syntax, reg_errcode_t *err) 2757a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 2758a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef _LIBC 2759a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const unsigned char *collseqmb; 2760a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const char *collseqwc; 2761a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner uint32_t nrules; 2762a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int32_t table_size; 2763a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const int32_t *symb_table; 2764a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const unsigned char *extra; 2765a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2766a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Local function for parse_bracket_exp used in _LIBC environement. 2767a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Seek the collating symbol entry correspondings to NAME. 2768a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Return the index of the symbol in the SYMB_TABLE. */ 2769a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2770a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner auto inline int32_t 2771a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner __attribute ((always_inline)) 2772a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner seek_collating_symbol_entry (name, name_len) 2773a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const unsigned char *name; 2774a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner size_t name_len; 2775a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2776a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int32_t hash = elem_hash ((const char *) name, name_len); 2777a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int32_t elem = hash % table_size; 2778a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (symb_table[2 * elem] != 0) 2779a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2780a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int32_t second = hash % (table_size - 2) + 1; 2781a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2782a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner do 2783a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2784a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* First compare the hashing value. */ 2785a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (symb_table[2 * elem] == hash 2786a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Compare the length of the name. */ 2787a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && name_len == extra[symb_table[2 * elem + 1]] 2788a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Compare the name. */ 2789a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], 2790a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner name_len) == 0) 2791a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2792a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Yep, this is the entry. */ 2793a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 2794a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2795a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2796a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Next entry. */ 2797a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner elem += second; 2798a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2799a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner while (symb_table[2 * elem] != 0); 2800a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2801a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return elem; 2802a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2803a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2804a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Local function for parse_bracket_exp used in _LIBC environement. 2805a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Look up the collation sequence value of BR_ELEM. 2806a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Return the value if succeeded, UINT_MAX otherwise. */ 2807a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2808a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner auto inline unsigned int 2809a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner __attribute ((always_inline)) 2810a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner lookup_collation_sequence_value (br_elem) 2811a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bracket_elem_t *br_elem; 2812a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2813a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (br_elem->type == SB_CHAR) 2814a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2815a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* 2816a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (MB_CUR_MAX == 1) 2817a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner */ 2818a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (nrules == 0) 2819a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return collseqmb[br_elem->opr.ch]; 2820a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 2821a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2822a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner wint_t wc = __btowc (br_elem->opr.ch); 2823a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return __collseq_table_lookup (collseqwc, wc); 2824a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2825a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2826a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (br_elem->type == MB_CHAR) 2827a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2828a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return __collseq_table_lookup (collseqwc, br_elem->opr.wch); 2829a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2830a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (br_elem->type == COLL_SYM) 2831a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2832a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner size_t sym_name_len = strlen ((char *) br_elem->opr.name); 2833a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (nrules != 0) 2834a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2835a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int32_t elem, idx; 2836a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner elem = seek_collating_symbol_entry (br_elem->opr.name, 2837a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner sym_name_len); 2838a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (symb_table[2 * elem] != 0) 2839a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2840a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* We found the entry. */ 2841a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner idx = symb_table[2 * elem + 1]; 2842a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Skip the name of collating element name. */ 2843a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner idx += 1 + extra[idx]; 2844a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Skip the byte sequence of the collating element. */ 2845a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner idx += 1 + extra[idx]; 2846a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Adjust for the alignment. */ 2847a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner idx = (idx + 3) & ~3; 2848a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Skip the multibyte collation sequence value. */ 2849a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner idx += sizeof (unsigned int); 2850a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Skip the wide char sequence of the collating element. */ 2851a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner idx += sizeof (unsigned int) * 2852a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner (1 + *(unsigned int *) (extra + idx)); 2853a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Return the collation sequence value. */ 2854a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return *(unsigned int *) (extra + idx); 2855a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2856a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (symb_table[2 * elem] == 0 && sym_name_len == 1) 2857a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2858a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* No valid character. Match it as a single byte 2859a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner character. */ 2860a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return collseqmb[br_elem->opr.name[0]]; 2861a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2862a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2863a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (sym_name_len == 1) 2864a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return collseqmb[br_elem->opr.name[0]]; 2865a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2866a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return UINT_MAX; 2867a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2868a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2869a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Local function for parse_bracket_exp used in _LIBC environement. 2870a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Build the range expression which starts from START_ELEM, and ends 2871a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner at END_ELEM. The result are written to MBCSET and SBCSET. 2872a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner RANGE_ALLOC is the allocated size of mbcset->range_starts, and 2873a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset->range_ends, is a pointer argument sinse we may 2874a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner update it. */ 2875a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2876a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner auto inline reg_errcode_t 2877a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner __attribute ((always_inline)) 2878a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) 2879a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_charset_t *mbcset; 2880a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx *range_alloc; 2881a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_t sbcset; 2882a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bracket_elem_t *start_elem, *end_elem; 2883a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2884a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner unsigned int ch; 2885a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner uint32_t start_collseq; 2886a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner uint32_t end_collseq; 2887a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2888a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Equivalence Classes and Character Classes can't be a range 2889a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner start/end. */ 2890a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS 2891a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, 2892a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 0)) 2893a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ERANGE; 2894a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2895a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner start_collseq = lookup_collation_sequence_value (start_elem); 2896a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner end_collseq = lookup_collation_sequence_value (end_elem); 2897a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Check start/end collation sequence values. */ 2898a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0)) 2899a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ECOLLATE; 2900a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)) 2901a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ERANGE; 2902a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2903a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Got valid collation sequence values, add them as a new entry. 2904a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner However, if we have no collation elements, and the character set 2905a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner is single byte, the single byte character set that we 2906a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner build below suffices. */ 2907a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (nrules > 0 || dfa->mb_cur_max > 1) 2908a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2909a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Check the space of the arrays. */ 2910a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (*range_alloc == mbcset->nranges, 0)) 2911a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2912a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* There is not enough space, need realloc. */ 2913a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner uint32_t *new_array_start; 2914a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner uint32_t *new_array_end; 2915a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx new_nranges; 2916a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2917a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* +1 in case of mbcset->nranges is 0. */ 2918a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner new_nranges = 2 * mbcset->nranges + 1; 2919a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner new_array_start = re_realloc (mbcset->range_starts, uint32_t, 2920a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner new_nranges); 2921a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner new_array_end = re_realloc (mbcset->range_ends, uint32_t, 2922a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner new_nranges); 2923a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2924a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (new_array_start == NULL || new_array_end == NULL, 0)) 2925a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 2926a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2927a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset->range_starts = new_array_start; 2928a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset->range_ends = new_array_end; 2929a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *range_alloc = new_nranges; 2930a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2931a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2932a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset->range_starts[mbcset->nranges] = start_collseq; 2933a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset->range_ends[mbcset->nranges++] = end_collseq; 2934a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2935a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2936a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Build the table for single byte characters. */ 2937a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (ch = 0; ch < SBC_MAX; ch++) 2938a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2939a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner uint32_t ch_collseq; 2940a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* 2941a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (MB_CUR_MAX == 1) 2942a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner */ 2943a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (nrules == 0) 2944a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ch_collseq = collseqmb[ch]; 2945a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 2946a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch)); 2947a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (start_collseq <= ch_collseq && ch_collseq <= end_collseq) 2948a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_set (sbcset, ch); 2949a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2950a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 2951a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2952a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2953a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Local function for parse_bracket_exp used in _LIBC environement. 2954a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Build the collating element which is represented by NAME. 2955a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner The result are written to MBCSET and SBCSET. 2956a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a 2957a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner pointer argument sinse we may update it. */ 2958a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2959a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner auto inline reg_errcode_t 2960a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner __attribute ((always_inline)) 2961a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) 2962a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_charset_t *mbcset; 2963a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx *coll_sym_alloc; 2964a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_t sbcset; 2965a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const unsigned char *name; 2966a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2967a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int32_t elem, idx; 2968a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner size_t name_len = strlen ((const char *) name); 2969a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (nrules != 0) 2970a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2971a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner elem = seek_collating_symbol_entry (name, name_len); 2972a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (symb_table[2 * elem] != 0) 2973a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2974a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* We found the entry. */ 2975a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner idx = symb_table[2 * elem + 1]; 2976a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Skip the name of collating element name. */ 2977a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner idx += 1 + extra[idx]; 2978a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2979a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (symb_table[2 * elem] == 0 && name_len == 1) 2980a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2981a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* No valid character, treat it as a normal 2982a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner character. */ 2983a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_set (sbcset, name[0]); 2984a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 2985a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 2986a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 2987a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ECOLLATE; 2988a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 2989a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Got valid collation sequence, add it as a new entry. */ 2990a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Check the space of the arrays. */ 2991a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0)) 2992a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 2993a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Not enough, realloc it. */ 2994a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* +1 in case of mbcset->ncoll_syms is 0. */ 2995a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; 2996a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Use realloc since mbcset->coll_syms is NULL 2997a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if *alloc == 0. */ 2998a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t, 2999a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner new_coll_sym_alloc); 3000a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (new_coll_syms == NULL, 0)) 3001a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 3002a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset->coll_syms = new_coll_syms; 3003a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *coll_sym_alloc = new_coll_sym_alloc; 3004a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3005a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset->coll_syms[mbcset->ncoll_syms++] = idx; 3006a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 3007a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3008a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 3009a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3010a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (name_len != 1, 0)) 3011a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ECOLLATE; 3012a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 3013a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3014a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_set (sbcset, name[0]); 3015a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 3016a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3017a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3018a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3019a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 3020a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3021a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_t br_token; 3022a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_bitset_ptr_t sbcset; 3023a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3024a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_charset_t *mbcset; 3025a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; 3026a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx equiv_class_alloc = 0, char_class_alloc = 0; 3027a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* not RE_ENABLE_I18N */ 3028a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bool non_match = false; 3029a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *work_tree; 3030a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int token_len; 3031a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bool first_round = true; 3032a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef _LIBC 3033a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner collseqmb = (const unsigned char *) 3034a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); 3035a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 3036a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (nrules) 3037a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3038a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* 3039a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (MB_CUR_MAX > 1) 3040a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner */ 3041a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); 3042a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); 3043a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE, 3044a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner _NL_COLLATE_SYMB_TABLEMB); 3045a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, 3046a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner _NL_COLLATE_SYMB_EXTRAMB); 3047a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3048a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 3049a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); 3050a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3051a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); 3052a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* RE_ENABLE_I18N */ 3053a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3054a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (sbcset == NULL || mbcset == NULL, 0)) 3055a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#else 3056a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (sbcset == NULL, 0)) 3057a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* RE_ENABLE_I18N */ 3058a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3059a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_ESPACE; 3060a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 3061a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3062a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3063a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token_len = peek_token_bracket (token, regexp, syntax); 3064a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (token->type == END_OF_RE, 0)) 3065a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3066a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_BADPAT; 3067a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto parse_bracket_exp_free_return; 3068a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3069a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (token->type == OP_NON_MATCH_LIST) 3070a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3071a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3072a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset->non_match = 1; 3073a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* not RE_ENABLE_I18N */ 3074a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner non_match = true; 3075a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (syntax & RE_HAT_LISTS_NOT_NEWLINE) 3076a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_set (sbcset, '\n'); 3077a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_string_skip_bytes (regexp, token_len); /* Skip a token. */ 3078a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token_len = peek_token_bracket (token, regexp, syntax); 3079a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (token->type == END_OF_RE, 0)) 3080a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3081a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_BADPAT; 3082a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto parse_bracket_exp_free_return; 3083a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3084a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3085a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3086a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* We treat the first ']' as a normal character. */ 3087a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (token->type == OP_CLOSE_BRACKET) 3088a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = CHARACTER; 3089a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3090a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner while (1) 3091a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3092a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bracket_elem_t start_elem, end_elem; 3093a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE]; 3094a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE]; 3095a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_errcode_t ret; 3096a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int token_len2 = 0; 3097a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bool is_range_exp = false; 3098a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_t token2; 3099a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3100a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner start_elem.opr.name = start_name_buf; 3101a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa, 3102a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner syntax, first_round); 3103a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (ret != REG_NOERROR, 0)) 3104a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3105a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = ret; 3106a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto parse_bracket_exp_free_return; 3107a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3108a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner first_round = false; 3109a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3110a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Get information about the next token. We need it in any case. */ 3111a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token_len = peek_token_bracket (token, regexp, syntax); 3112a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3113a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Do not check for ranges if we know they are not allowed. */ 3114a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS) 3115a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3116a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (token->type == END_OF_RE, 0)) 3117a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3118a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_EBRACK; 3119a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto parse_bracket_exp_free_return; 3120a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3121a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (token->type == OP_CHARSET_RANGE) 3122a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3123a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_string_skip_bytes (regexp, token_len); /* Skip '-'. */ 3124a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token_len2 = peek_token_bracket (&token2, regexp, syntax); 3125a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (token2.type == END_OF_RE, 0)) 3126a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3127a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_EBRACK; 3128a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto parse_bracket_exp_free_return; 3129a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3130a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (token2.type == OP_CLOSE_BRACKET) 3131a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3132a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* We treat the last '-' as a normal character. */ 3133a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_string_skip_bytes (regexp, -token_len); 3134a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token->type = CHARACTER; 3135a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3136a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 3137a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner is_range_exp = true; 3138a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3139a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3140a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3141a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (is_range_exp == true) 3142a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3143a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner end_elem.opr.name = end_name_buf; 3144a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2, 3145a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa, syntax, true); 3146a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (ret != REG_NOERROR, 0)) 3147a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3148a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = ret; 3149a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto parse_bracket_exp_free_return; 3150a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3151a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3152a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner token_len = peek_token_bracket (token, regexp, syntax); 3153a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3154a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef _LIBC 3155a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = build_range_exp (sbcset, mbcset, &range_alloc, 3156a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner &start_elem, &end_elem); 3157a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#else 3158a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# ifdef RE_ENABLE_I18N 3159a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = build_range_exp (sbcset, 3160a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->mb_cur_max > 1 ? mbcset : NULL, 3161a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner &range_alloc, &start_elem, &end_elem); 3162a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# else 3163a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = build_range_exp (sbcset, &start_elem, &end_elem); 3164a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# endif 3165a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* RE_ENABLE_I18N */ 3166a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (*err != REG_NOERROR, 0)) 3167a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto parse_bracket_exp_free_return; 3168a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3169a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 3170a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3171a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner switch (start_elem.type) 3172a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3173a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case SB_CHAR: 3174a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_set (sbcset, start_elem.opr.ch); 3175a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 3176a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3177a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case MB_CHAR: 3178a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Check whether the array has enough space. */ 3179a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (mbchar_alloc == mbcset->nmbchars, 0)) 3180a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3181a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner wchar_t *new_mbchars; 3182a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Not enough, realloc it. */ 3183a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* +1 in case of mbcset->nmbchars is 0. */ 3184a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbchar_alloc = 2 * mbcset->nmbchars + 1; 3185a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Use realloc since array is NULL if *alloc == 0. */ 3186a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner new_mbchars = re_realloc (mbcset->mbchars, wchar_t, 3187a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbchar_alloc); 3188a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (new_mbchars == NULL, 0)) 3189a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto parse_bracket_exp_espace; 3190a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset->mbchars = new_mbchars; 3191a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3192a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; 3193a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 3194a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* RE_ENABLE_I18N */ 3195a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case EQUIV_CLASS: 3196a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = build_equiv_class (sbcset, 3197a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3198a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset, &equiv_class_alloc, 3199a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* RE_ENABLE_I18N */ 3200a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner start_elem.opr.name); 3201a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (*err != REG_NOERROR, 0)) 3202a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto parse_bracket_exp_free_return; 3203a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 3204a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case COLL_SYM: 3205a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = build_collating_symbol (sbcset, 3206a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3207a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset, &coll_sym_alloc, 3208a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* RE_ENABLE_I18N */ 3209a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner start_elem.opr.name); 3210a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (*err != REG_NOERROR, 0)) 3211a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto parse_bracket_exp_free_return; 3212a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 3213a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case CHAR_CLASS: 3214a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = build_charclass (regexp->trans, sbcset, 3215a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3216a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset, &char_class_alloc, 3217a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* RE_ENABLE_I18N */ 3218a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner start_elem.opr.name, syntax); 3219a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (*err != REG_NOERROR, 0)) 3220a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto parse_bracket_exp_free_return; 3221a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 3222a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner default: 3223a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner assert (0); 3224a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 3225a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3226a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3227a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (token->type == END_OF_RE, 0)) 3228a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3229a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_EBRACK; 3230a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto parse_bracket_exp_free_return; 3231a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3232a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (token->type == OP_CLOSE_BRACKET) 3233a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 3234a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3235a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3236a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_string_skip_bytes (regexp, token_len); /* Skip a token. */ 3237a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3238a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* If it is non-matching list. */ 3239a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (non_match) 3240a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_not (sbcset); 3241a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3242a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3243a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Ensure only single byte characters are set. */ 3244a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->mb_cur_max > 1) 3245a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_mask (sbcset, dfa->sb_char); 3246a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3247a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes 3248a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes 3249a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner || mbcset->non_match))) 3250a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3251a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *mbc_tree; 3252a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int sbc_idx; 3253a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Build a tree for complex bracket. */ 3254a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->has_mb_node = 1; 3255a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner br_token.type = COMPLEX_BRACKET; 3256a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner br_token.opr.mbcset = mbcset; 3257a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); 3258a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (mbc_tree == NULL, 0)) 3259a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto parse_bracket_exp_espace; 3260a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx) 3261a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (sbcset[sbc_idx]) 3262a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 3263a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* If there are no bits set in sbcset, there is no point 3264a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */ 3265a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (sbc_idx < BITSET_WORDS) 3266a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3267a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Build a tree for simple bracket. */ 3268a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner br_token.type = SIMPLE_BRACKET; 3269a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner br_token.opr.sbcset = sbcset; 3270a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner work_tree = create_token_tree (dfa, NULL, NULL, &br_token); 3271a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (work_tree == NULL, 0)) 3272a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto parse_bracket_exp_espace; 3273a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3274a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Then join them by ALT node. */ 3275a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT); 3276a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (work_tree == NULL, 0)) 3277a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto parse_bracket_exp_espace; 3278a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3279a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 3280a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3281a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (sbcset); 3282a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner work_tree = mbc_tree; 3283a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3284a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3285a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 3286a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* not RE_ENABLE_I18N */ 3287a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3288a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3289a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner free_charset (mbcset); 3290a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 3291a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Build a tree for simple bracket. */ 3292a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner br_token.type = SIMPLE_BRACKET; 3293a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner br_token.opr.sbcset = sbcset; 3294a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner work_tree = create_token_tree (dfa, NULL, NULL, &br_token); 3295a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (work_tree == NULL, 0)) 3296a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto parse_bracket_exp_espace; 3297a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3298a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return work_tree; 3299a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3300a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner parse_bracket_exp_espace: 3301a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_ESPACE; 3302a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner parse_bracket_exp_free_return: 3303a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (sbcset); 3304a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3305a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner free_charset (mbcset); 3306a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* RE_ENABLE_I18N */ 3307a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 3308a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 3309a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3310a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Parse an element in the bracket expression. */ 3311a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3312a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 3313a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerparse_bracket_element (bracket_elem_t *elem, re_string_t *regexp, 3314a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_t *token, int token_len, re_dfa_t *dfa, 3315a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_syntax_t syntax, bool accept_hyphen) 3316a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 3317a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3318a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int cur_char_size; 3319a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp)); 3320a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (cur_char_size > 1) 3321a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3322a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner elem->type = MB_CHAR; 3323a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp)); 3324a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_string_skip_bytes (regexp, cur_char_size); 3325a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 3326a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3327a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* RE_ENABLE_I18N */ 3328a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_string_skip_bytes (regexp, token_len); /* Skip a token. */ 3329a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS 3330a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner || token->type == OP_OPEN_EQUIV_CLASS) 3331a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return parse_bracket_symbol (elem, regexp, token); 3332a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen) 3333a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3334a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* A '-' must only appear as anything but a range indicator before 3335a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner the closing bracket. Everything else is an error. */ 3336a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_t token2; 3337a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner (void) peek_token_bracket (&token2, regexp, syntax); 3338a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (token2.type != OP_CLOSE_BRACKET) 3339a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* The actual error value is not standardized since this whole 3340a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case is undefined. But ERANGE makes good sense. */ 3341a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ERANGE; 3342a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3343a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner elem->type = SB_CHAR; 3344a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner elem->opr.ch = token->opr.c; 3345a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 3346a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 3347a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3348a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Parse a bracket symbol in the bracket expression. Bracket symbols are 3349a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner such as [:<character_class>:], [.<collating_element>.], and 3350a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner [=<equivalent_class>=]. */ 3351a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3352a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 3353a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerparse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, 3354a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_t *token) 3355a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 3356a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner unsigned char ch, delim = token->opr.c; 3357a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int i = 0; 3358a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (re_string_eoi(regexp)) 3359a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_EBRACK; 3360a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (;; ++i) 3361a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3362a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (i >= BRACKET_NAME_BUF_SIZE) 3363a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_EBRACK; 3364a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (token->type == OP_OPEN_CHAR_CLASS) 3365a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ch = re_string_fetch_byte_case (regexp); 3366a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 3367a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ch = re_string_fetch_byte (regexp); 3368a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (re_string_eoi(regexp)) 3369a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_EBRACK; 3370a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (ch == delim && re_string_peek_byte (regexp, 0) == ']') 3371a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 3372a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner elem->opr.name[i] = ch; 3373a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3374a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_string_skip_bytes (regexp, 1); 3375a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner elem->opr.name[i] = '\0'; 3376a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner switch (token->type) 3377a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3378a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_OPEN_COLL_ELEM: 3379a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner elem->type = COLL_SYM; 3380a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 3381a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_OPEN_EQUIV_CLASS: 3382a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner elem->type = EQUIV_CLASS; 3383a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 3384a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner case OP_OPEN_CHAR_CLASS: 3385a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner elem->type = CHAR_CLASS; 3386a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 3387a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner default: 3388a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 3389a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3390a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 3391a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 3392a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3393a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Helper function for parse_bracket_exp. 3394a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Build the equivalence class which is represented by NAME. 3395a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner The result are written to MBCSET and SBCSET. 3396a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes, 3397a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner is a pointer argument sinse we may update it. */ 3398a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3399a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 3400a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3401a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerbuild_equiv_class (bitset_t sbcset, re_charset_t *mbcset, 3402a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx *equiv_class_alloc, const unsigned char *name) 3403a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#else /* not RE_ENABLE_I18N */ 3404a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerbuild_equiv_class (bitset_t sbcset, const unsigned char *name) 3405a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* not RE_ENABLE_I18N */ 3406a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 3407a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef _LIBC 3408a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 3409a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (nrules != 0) 3410a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3411a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const int32_t *table, *indirect; 3412a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const unsigned char *weights, *extra, *cp; 3413a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner unsigned char char_buf[2]; 3414a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int32_t idx1, idx2; 3415a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner unsigned int ch; 3416a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner size_t len; 3417a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* This #include defines a local function! */ 3418a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# include <locale/weight.h> 3419a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Calculate the index for equivalence class. */ 3420a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner cp = name; 3421a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); 3422a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE, 3423a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner _NL_COLLATE_WEIGHTMB); 3424a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, 3425a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner _NL_COLLATE_EXTRAMB); 3426a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, 3427a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner _NL_COLLATE_INDIRECTMB); 3428a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner idx1 = findidx (&cp); 3429a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0)) 3430a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* This isn't a valid character. */ 3431a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ECOLLATE; 3432a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3433a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Build single byte matcing table for this equivalence class. */ 3434a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner char_buf[1] = (unsigned char) '\0'; 3435a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner len = weights[idx1]; 3436a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (ch = 0; ch < SBC_MAX; ++ch) 3437a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3438a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner char_buf[0] = ch; 3439a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner cp = char_buf; 3440a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner idx2 = findidx (&cp); 3441a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* 3442a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner idx2 = table[ch]; 3443a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner*/ 3444a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (idx2 == 0) 3445a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* This isn't a valid character. */ 3446a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner continue; 3447a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (len == weights[idx2]) 3448a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3449a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int cnt = 0; 3450a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner while (cnt <= len && 3451a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt]) 3452a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ++cnt; 3453a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3454a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (cnt > len) 3455a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_set (sbcset, ch); 3456a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3457a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3458a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Check whether the array has enough space. */ 3459a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0)) 3460a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3461a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Not enough, realloc it. */ 3462a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* +1 in case of mbcset->nequiv_classes is 0. */ 3463a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1; 3464a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Use realloc since the array is NULL if *alloc == 0. */ 3465a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes, 3466a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int32_t, 3467a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner new_equiv_class_alloc); 3468a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (new_equiv_classes == NULL, 0)) 3469a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 3470a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset->equiv_classes = new_equiv_classes; 3471a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *equiv_class_alloc = new_equiv_class_alloc; 3472a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3473a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; 3474a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3475a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 3476a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* _LIBC */ 3477a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3478a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (strlen ((const char *) name) != 1, 0)) 3479a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ECOLLATE; 3480a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_set (sbcset, *name); 3481a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3482a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 3483a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 3484a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3485a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Helper function for parse_bracket_exp. 3486a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Build the character class which is represented by NAME. 3487a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner The result are written to MBCSET and SBCSET. 3488a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes, 3489a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner is a pointer argument sinse we may update it. */ 3490a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3491a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 3492a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3493a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerbuild_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, 3494a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_charset_t *mbcset, Idx *char_class_alloc, 3495a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const unsigned char *class_name, reg_syntax_t syntax) 3496a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#else /* not RE_ENABLE_I18N */ 3497a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerbuild_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, 3498a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const unsigned char *class_name, reg_syntax_t syntax) 3499a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* not RE_ENABLE_I18N */ 3500a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 3501a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner int i; 3502a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const char *name = (const char *) class_name; 3503a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3504a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* In case of REG_ICASE "upper" and "lower" match the both of 3505a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner upper and lower cases. */ 3506a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if ((syntax & RE_ICASE) 3507a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0)) 3508a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner name = "alpha"; 3509a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3510a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3511a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Check the space of the arrays. */ 3512a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (*char_class_alloc == mbcset->nchar_classes, 0)) 3513a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3514a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Not enough, realloc it. */ 3515a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* +1 in case of mbcset->nchar_classes is 0. */ 3516a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx new_char_class_alloc = 2 * mbcset->nchar_classes + 1; 3517a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Use realloc since array is NULL if *alloc == 0. */ 3518a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t, 3519a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner new_char_class_alloc); 3520a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (new_char_classes == NULL, 0)) 3521a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ESPACE; 3522a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset->char_classes = new_char_classes; 3523a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *char_class_alloc = new_char_class_alloc; 3524a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3525a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); 3526a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* RE_ENABLE_I18N */ 3527a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3528a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#define BUILD_CHARCLASS_LOOP(ctype_func) \ 3529a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner do { \ 3530a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (trans != NULL, 0)) \ 3531a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { \ 3532a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (i = 0; i < SBC_MAX; ++i) \ 3533a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (ctype_func (i)) \ 3534a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_set (sbcset, trans[i]); \ 3535a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } \ 3536a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else \ 3537a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { \ 3538a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (i = 0; i < SBC_MAX; ++i) \ 3539a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (ctype_func (i)) \ 3540a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_set (sbcset, i); \ 3541a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } \ 3542a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } while (0) 3543a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3544a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (strcmp (name, "alnum") == 0) 3545a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner BUILD_CHARCLASS_LOOP (isalnum); 3546a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (strcmp (name, "cntrl") == 0) 3547a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner BUILD_CHARCLASS_LOOP (iscntrl); 3548a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (strcmp (name, "lower") == 0) 3549a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner BUILD_CHARCLASS_LOOP (islower); 3550a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (strcmp (name, "space") == 0) 3551a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner BUILD_CHARCLASS_LOOP (isspace); 3552a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (strcmp (name, "alpha") == 0) 3553a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner BUILD_CHARCLASS_LOOP (isalpha); 3554a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (strcmp (name, "digit") == 0) 3555a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner BUILD_CHARCLASS_LOOP (isdigit); 3556a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (strcmp (name, "print") == 0) 3557a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner BUILD_CHARCLASS_LOOP (isprint); 3558a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (strcmp (name, "upper") == 0) 3559a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner BUILD_CHARCLASS_LOOP (isupper); 3560a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (strcmp (name, "blank") == 0) 3561a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner BUILD_CHARCLASS_LOOP (isblank); 3562a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (strcmp (name, "graph") == 0) 3563a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner BUILD_CHARCLASS_LOOP (isgraph); 3564a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (strcmp (name, "punct") == 0) 3565a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner BUILD_CHARCLASS_LOOP (ispunct); 3566a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else if (strcmp (name, "xdigit") == 0) 3567a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner BUILD_CHARCLASS_LOOP (isxdigit); 3568a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 3569a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ECTYPE; 3570a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3571a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 3572a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 3573a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3574a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t * 3575a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerbuild_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, 3576a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const unsigned char *class_name, 3577a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const unsigned char *extra, bool non_match, 3578a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_errcode_t *err) 3579a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 3580a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_bitset_ptr_t sbcset; 3581a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3582a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_charset_t *mbcset; 3583a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx alloc = 0; 3584a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* not RE_ENABLE_I18N */ 3585a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner reg_errcode_t ret; 3586a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_t br_token; 3587a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *tree; 3588a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3589a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); 3590a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3591a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); 3592a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* RE_ENABLE_I18N */ 3593a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3594a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3595a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (sbcset == NULL || mbcset == NULL, 0)) 3596a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#else /* not RE_ENABLE_I18N */ 3597a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (sbcset == NULL, 0)) 3598a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* not RE_ENABLE_I18N */ 3599a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3600a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_ESPACE; 3601a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 3602a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3603a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3604a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (non_match) 3605a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3606a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3607a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset->non_match = 1; 3608a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* not RE_ENABLE_I18N */ 3609a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3610a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3611a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* We don't care the syntax in this case. */ 3612a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ret = build_charclass (trans, sbcset, 3613a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3614a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbcset, &alloc, 3615a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* RE_ENABLE_I18N */ 3616a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner class_name, 0); 3617a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3618a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (ret != REG_NOERROR, 0)) 3619a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3620a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (sbcset); 3621a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3622a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner free_charset (mbcset); 3623a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* RE_ENABLE_I18N */ 3624a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = ret; 3625a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 3626a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3627a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* \w match '_' also. */ 3628a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (; *extra; extra++) 3629a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_set (sbcset, *extra); 3630a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3631a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* If it is non-matching list. */ 3632a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (non_match) 3633a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_not (sbcset); 3634a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3635a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3636a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Ensure only single byte characters are set. */ 3637a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->mb_cur_max > 1) 3638a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bitset_mask (sbcset, dfa->sb_char); 3639a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif 3640a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3641a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Build a tree for simple bracket. */ 3642a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner br_token.type = SIMPLE_BRACKET; 3643a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner br_token.opr.sbcset = sbcset; 3644a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = create_token_tree (dfa, NULL, NULL, &br_token); 3645a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (tree == NULL, 0)) 3646a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto build_word_op_espace; 3647a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3648a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3649a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (dfa->mb_cur_max > 1) 3650a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3651a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *mbc_tree; 3652a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Build a tree for complex bracket. */ 3653a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner br_token.type = COMPLEX_BRACKET; 3654a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner br_token.opr.mbcset = mbcset; 3655a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->has_mb_node = 1; 3656a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); 3657a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (mbc_tree == NULL, 0)) 3658a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner goto build_word_op_espace; 3659a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Then join them by ALT node. */ 3660a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = create_tree (dfa, tree, mbc_tree, OP_ALT); 3661a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (mbc_tree != NULL, 1)) 3662a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return tree; 3663a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3664a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 3665a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3666a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner free_charset (mbcset); 3667a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return tree; 3668a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3669a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#else /* not RE_ENABLE_I18N */ 3670a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return tree; 3671a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* not RE_ENABLE_I18N */ 3672a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3673a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner build_word_op_espace: 3674a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (sbcset); 3675a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3676a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner free_charset (mbcset); 3677a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* RE_ENABLE_I18N */ 3678a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *err = REG_ESPACE; 3679a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 3680a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 3681a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3682a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* This is intended for the expressions like "a{1,3}". 3683a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Fetch a number from `input', and return the number. 3684a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Return REG_MISSING if the number field is empty like "{,1}". 3685a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Return REG_ERROR if an error occurred. */ 3686a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3687a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic Idx 3688a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerfetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax) 3689a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 3690a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner Idx num = REG_MISSING; 3691a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner unsigned char c; 3692a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner while (1) 3693a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3694a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner fetch_token (token, input, syntax); 3695a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner c = token->opr.c; 3696a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (token->type == END_OF_RE, 0)) 3697a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_ERROR; 3698a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (token->type == OP_CLOSE_DUP_NUM || c == ',') 3699a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner break; 3700a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner num = ((token->type != CHARACTER || c < '0' || '9' < c 3701a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner || num == REG_ERROR) 3702a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner ? REG_ERROR 3703a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner : ((num == REG_MISSING) ? c - '0' : num * 10 + c - '0')); 3704a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner num = (num > RE_DUP_MAX) ? REG_ERROR : num; 3705a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3706a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return num; 3707a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 3708a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3709a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3710a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic void 3711a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerfree_charset (re_charset_t *cset) 3712a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 3713a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (cset->mbchars); 3714a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# ifdef _LIBC 3715a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (cset->coll_syms); 3716a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (cset->equiv_classes); 3717a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (cset->range_starts); 3718a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (cset->range_ends); 3719a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner# endif 3720a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (cset->char_classes); 3721a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (cset); 3722a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 3723a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* RE_ENABLE_I18N */ 3724a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3725a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Functions for binary tree operation. */ 3726a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3727a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Create a tree node. */ 3728a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3729a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t * 3730a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnercreate_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, 3731a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_type_t type) 3732a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 3733a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_token_t t; 3734a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner t.type = type; 3735a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return create_token_tree (dfa, left, right, &t); 3736a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 3737a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3738a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t * 3739a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnercreate_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, 3740a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const re_token_t *token) 3741a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 3742a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *tree; 3743a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0)) 3744a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3745a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1); 3746a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3747a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (storage == NULL) 3748a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 3749a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner storage->next = dfa->str_tree_storage; 3750a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->str_tree_storage = storage; 3751a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dfa->str_tree_storage_idx = 0; 3752a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3753a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++]; 3754a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3755a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree->parent = NULL; 3756a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree->left = left; 3757a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree->right = right; 3758a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree->token = *token; 3759a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree->token.duplicated = 0; 3760a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree->token.opt_subexp = 0; 3761a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree->first = NULL; 3762a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree->next = NULL; 3763a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner tree->node_idx = REG_MISSING; 3764a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3765a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (left != NULL) 3766a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner left->parent = tree; 3767a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (right != NULL) 3768a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner right->parent = tree; 3769a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return tree; 3770a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 3771a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3772a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Mark the tree SRC as an optional subexpression. 3773a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner To be called from preorder or postorder. */ 3774a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3775a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 3776a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnermark_opt_subexp (void *extra, bin_tree_t *node) 3777a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 3778d0b797fbf45f3cec923b22f93b61e86a1f867844Andrew Hsieh Idx idx = (Idx) (intptr_t) extra; 3779d0b797fbf45f3cec923b22f93b61e86a1f867844Andrew Hsieh assert(sizeof(void*) >= sizeof(Idx)); 3780a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (node->token.type == SUBEXP && node->token.opr.idx == idx) 3781a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node->token.opt_subexp = 1; 3782a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3783a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 3784a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 3785a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3786a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Free the allocated memory inside NODE. */ 3787a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3788a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic void 3789a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerfree_token (re_token_t *node) 3790a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 3791a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#ifdef RE_ENABLE_I18N 3792a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (node->type == COMPLEX_BRACKET && node->duplicated == 0) 3793a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner free_charset (node->opr.mbcset); 3794a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 3795a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner#endif /* RE_ENABLE_I18N */ 3796a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (node->type == SIMPLE_BRACKET && node->duplicated == 0) 3797a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner re_free (node->opr.sbcset); 3798a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 3799a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3800a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Worker function for tree walking. Free the allocated memory inside NODE 3801a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner and its children. */ 3802a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3803a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic reg_errcode_t 3804a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerfree_tree (void *extra, bin_tree_t *node) 3805a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 3806a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner free_token (&node->token); 3807a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return REG_NOERROR; 3808a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 3809a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3810a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3811a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner/* Duplicate the node SRC, and return new node. This is a preorder 3812a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner visit similar to the one implemented by the generic visitor, but 3813a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner we need more infrastructure to maintain two parallel trees --- so, 3814a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner it's easier to duplicate. */ 3815a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3816a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerstatic bin_tree_t * 3817a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turnerduplicate_tree (const bin_tree_t *root, re_dfa_t *dfa) 3818a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner{ 3819a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const bin_tree_t *node; 3820a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t *dup_root; 3821a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner bin_tree_t **p_new = &dup_root, *dup_node = root->parent; 3822a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3823a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner for (node = root; ; ) 3824a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3825a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Create a new tree and link it back to the current parent. */ 3826a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner *p_new = create_token_tree (dfa, NULL, NULL, &node->token); 3827a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (*p_new == NULL) 3828a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return NULL; 3829a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner (*p_new)->parent = dup_node; 3830a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner (*p_new)->token.duplicated = 1; 3831a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dup_node = *p_new; 3832a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner 3833a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner /* Go to the left node, or up and to the right. */ 3834a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (node->left) 3835a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3836a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node = node->left; 3837a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner p_new = &dup_node->left; 3838a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3839a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner else 3840a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3841a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner const bin_tree_t *prev = NULL; 3842a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner while (node->right == prev || node->right == NULL) 3843a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner { 3844a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner prev = node; 3845a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node = node->parent; 3846a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner dup_node = dup_node->parent; 3847a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner if (!node) 3848a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner return dup_root; 3849a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3850a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner node = node->right; 3851a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner p_new = &dup_node->right; 3852a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3853a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner } 3854a6dfe5f70959a596290e1591579d26a288a1a2f9David 'Digit' Turner} 3855