18cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd/** 28cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * @file op_regex.h 38cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * This file contains various definitions and interface for a 48cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * lightweight wrapper around libc regex, providing match 58cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * and replace facility. 68cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * 78cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * @remark Copyright 2003 OProfile authors 88cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * @remark Read the file COPYING 98cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * @remark Idea comes from TextFilt project <http://textfilt.sourceforge.net> 108cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * 118cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * @author Philippe Elie 128cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd */ 138cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd 148cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd#ifndef OP_REGEX_H 158cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd#define OP_REGEX_H 168cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd 178cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd// required by posix before including regex.h 188cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd#include <sys/types.h> 198cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd#include <regex.h> 208cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd 218cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd#include <string> 228cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd#include <vector> 238cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd#include <map> 248cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd 258cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd#include "op_exception.h" 268cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd 278cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd/** 288cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * ill formed regular expression or expression throw such exception 298cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd */ 308cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Doddstruct bad_regex : op_exception { 318cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd bad_regex(std::string const & pattern); 328cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd}; 338cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd 348cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd/** 358cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * lightweight encapsulation of regex lib search and replace 368cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * 378cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * See stl.pat for further details and examples of used syntax. 388cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd */ 398cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Doddclass regular_expression_replace { 408cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Doddpublic: 418cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd /** 428cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * @param limit limit on number of search and replace done 438cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * @param limit_defs_expansion limit on number of expansion done 448cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * during replacement of regular definition name by their expansion 458cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * 468cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * build an object holding regular defintion and regular expression 478cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * & replace, preparing it for substitution ala sed 488cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd */ 498cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd regular_expression_replace(size_t limit = 100, 508cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd size_t limit_defs_expansion = 100); 518cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd ~regular_expression_replace(); 528cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd 538cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd /** 548cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * @param name a regular definition name 558cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * @param replace the string to subsitute in other regular definition 568cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * or regular exepression when this regular defintion name is 578cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * encoutered. 588cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd */ 598cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd void add_definition(std::string const & name, 608cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd std::string const & replace); 618cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd /** 628cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * @param pattern a regular expression pattern, POSIX extended notation 638cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * @param replace the replace string to use when this regular 648cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * expression is matched 658cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * 668cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * You can imbed regular definition in pattern but not in replace. 678cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd */ 688cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd void add_pattern(std::string const & pattern, 698cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd std::string const & replace); 708cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd 718cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd /** 728cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * @param str the input/output string where we search pattern and 738cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * replace them. 748cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * 758cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * Execute loop at max limit time on the set of regular expression 768cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * 778cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * Return true if too many match occur and replacing has been stopped 788cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * due to reach limit_defs_expansion. You can test if some pattern has 798cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * been matched by saving the input string and comparing it to the new 808cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * value. There is no way to detect s/a/a because the output string 818cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * will be identical to the input string. 828cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd */ 838cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd bool execute(std::string & str) const; 848cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Doddprivate: 858cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd struct replace_t { 868cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd // when this regexp is matched 878cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd regex_t regexp; 888cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd // replace the matched part with this string 898cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd std::string replace; 908cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd }; 918cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd 928cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd // helper to execute 938cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd bool do_execute(std::string & str, replace_t const & regexp) const; 948cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd void do_replace(std::string & str, std::string const & replace, 958cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd regmatch_t const * match) const; 968cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd 978cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd // helper to add_definition() and add_pattern() 988cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd std::string expand_string(std::string const & input); 998cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd 1008cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd // helper to add_pattern 1018cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd std::string substitute_definition(std::string const & pattern); 1028cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd 1038cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd // return the match of throw if idx is invalid 1048cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd regmatch_t const & get_match(regmatch_t const * match, char idx) const; 1058cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd 1068cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd // don't increase too, it have direct impact on performance. This limit 1078cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd // the number of grouping expression allowed in a regular expression 1088cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd // Note than you can use grouping match operator > 9 only in the 1098cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd // replace rule not in match regular expression since POSIX don't allow 1108cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd // more than \9 in matching sequence. 1118cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd static const size_t max_match = 16; 1128cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd 1138cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd size_t limit; 1148cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd size_t limit_defs_expansion; 1158cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd std::vector<replace_t> regex_replace; 1168cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd /// dictionary of regular definition 1178cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd typedef std::map<std::string, std::string> defs_dict; 1188cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd defs_dict defs; 1198cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd}; 1208cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd 1218cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd/** 1228cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * @param regex the regular_expression_replace to fill 1238cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * @param filename the filename from where the deifnition and pattern are read 1248cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * 1258cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * add to regex pattern and regular definition read from the given file 1268cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd */ 1278cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Doddvoid setup_regex(regular_expression_replace& regex, 1288cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd std::string const & filename); 1298cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd 1308cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd#endif /* !OP_REGEX_H */ 131