1/**
2 * @file op_regex.h
3 * This file contains various definitions and interface for a
4 * lightweight wrapper around libc regex, providing match
5 * and replace facility.
6 *
7 * @remark Copyright 2003 OProfile authors
8 * @remark Read the file COPYING
9 * @remark Idea comes from TextFilt project <http://textfilt.sourceforge.net>
10 *
11 * @author Philippe Elie
12 */
13
14#ifndef OP_REGEX_H
15#define OP_REGEX_H
16
17// required by posix before including regex.h
18#include <sys/types.h>
19#include <regex.h>
20
21#include <string>
22#include <vector>
23#include <map>
24
25#include "op_exception.h"
26
27/**
28 * ill formed regular expression or expression throw such exception
29 */
30struct bad_regex : op_exception {
31	bad_regex(std::string const & pattern);
32};
33
34/**
35 * lightweight encapsulation of regex lib search and replace
36 *
37 * See stl.pat for further details and examples of used syntax.
38 */
39class regular_expression_replace {
40public:
41	/**
42	 * @param limit limit on number of search and replace done
43	 * @param limit_defs_expansion limit on number of expansion done
44	 *  during replacement of regular definition name by their expansion
45	 *
46	 * build an object holding regular defintion and regular expression
47	 * & replace, preparing it for substitution ala sed
48	 */
49	regular_expression_replace(size_t limit = 100,
50				   size_t limit_defs_expansion = 100);
51	~regular_expression_replace();
52
53	/**
54	 * @param name a regular definition name
55	 * @param replace the string to subsitute in other regular definition
56	 * or regular exepression when this regular defintion name is
57	 * encoutered.
58	 */
59	void add_definition(std::string const & name,
60			    std::string const & replace);
61	/**
62	 * @param pattern a regular expression pattern, POSIX extended notation
63	 * @param replace the replace string to use when this regular
64	 *  expression is matched
65	 *
66	 * You can imbed regular definition in pattern but not in replace.
67	 */
68	void add_pattern(std::string const & pattern,
69			 std::string const & replace);
70
71	/**
72	 * @param str the input/output string where we search pattern and
73	 * replace them.
74	 *
75	 * Execute loop at max limit time on the set of regular expression
76	 *
77	 * Return true if too many match occur and replacing has been stopped
78	 * due to reach limit_defs_expansion. You can test if some pattern has
79	 * been matched by saving the input string and comparing it to the new
80	 * value. There is no way to detect s/a/a because the output string
81	 * will be identical to the input string.
82	 */
83	bool execute(std::string & str) const;
84private:
85	struct replace_t {
86		// when this regexp is matched
87		regex_t regexp;
88		// replace the matched part with this string
89		std::string replace;
90	};
91
92	// helper to execute
93	bool do_execute(std::string & str, replace_t const & regexp) const;
94	void do_replace(std::string & str, std::string const & replace,
95			regmatch_t const * match) const;
96
97	// helper to add_definition() and add_pattern()
98	std::string expand_string(std::string const & input);
99
100	// helper to add_pattern
101	std::string substitute_definition(std::string const & pattern);
102
103	// return the match of throw if idx is invalid
104	regmatch_t const & get_match(regmatch_t const * match, char idx) const;
105
106	// don't increase too, it have direct impact on performance. This limit
107	// the number of grouping expression allowed in a regular expression
108	// Note than you can use grouping match operator > 9 only in the
109	// replace rule not in match regular expression since POSIX don't allow
110	// more than \9 in matching sequence.
111	static const size_t max_match = 16;
112
113	size_t limit;
114	size_t limit_defs_expansion;
115	std::vector<replace_t> regex_replace;
116	/// dictionary of regular definition
117	typedef std::map<std::string, std::string> defs_dict;
118	defs_dict defs;
119};
120
121/**
122 * @param regex the regular_expression_replace to fill
123 * @param filename the filename from where the deifnition and pattern are read
124 *
125 * add to regex pattern and regular definition read from the given file
126 */
127void setup_regex(regular_expression_replace& regex,
128		 std::string const & filename);
129
130#endif /* !OP_REGEX_H */
131