18cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd/**
28cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * @file op_regex.h
38cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * This file contains various definitions and interface for a
48cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * lightweight wrapper around libc regex, providing match
58cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * and replace facility.
68cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd *
78cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * @remark Copyright 2003 OProfile authors
88cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * @remark Read the file COPYING
98cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * @remark Idea comes from TextFilt project <http://textfilt.sourceforge.net>
108cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd *
118cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * @author Philippe Elie
128cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd */
138cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd
148cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd#ifndef OP_REGEX_H
158cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd#define OP_REGEX_H
168cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd
178cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd// required by posix before including regex.h
188cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd#include <sys/types.h>
198cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd#include <regex.h>
208cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd
218cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd#include <string>
228cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd#include <vector>
238cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd#include <map>
248cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd
258cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd#include "op_exception.h"
268cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd
278cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd/**
288cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * ill formed regular expression or expression throw such exception
298cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd */
308cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Doddstruct bad_regex : op_exception {
318cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	bad_regex(std::string const & pattern);
328cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd};
338cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd
348cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd/**
358cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * lightweight encapsulation of regex lib search and replace
368cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd *
378cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * See stl.pat for further details and examples of used syntax.
388cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd */
398cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Doddclass regular_expression_replace {
408cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Doddpublic:
418cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	/**
428cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 * @param limit limit on number of search and replace done
438cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 * @param limit_defs_expansion limit on number of expansion done
448cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 *  during replacement of regular definition name by their expansion
458cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 *
468cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 * build an object holding regular defintion and regular expression
478cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 * & replace, preparing it for substitution ala sed
488cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 */
498cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	regular_expression_replace(size_t limit = 100,
508cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd				   size_t limit_defs_expansion = 100);
518cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	~regular_expression_replace();
528cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd
538cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	/**
548cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 * @param name a regular definition name
558cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 * @param replace the string to subsitute in other regular definition
568cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 * or regular exepression when this regular defintion name is
578cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 * encoutered.
588cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 */
598cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	void add_definition(std::string const & name,
608cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd			    std::string const & replace);
618cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	/**
628cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 * @param pattern a regular expression pattern, POSIX extended notation
638cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 * @param replace the replace string to use when this regular
648cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 *  expression is matched
658cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 *
668cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 * You can imbed regular definition in pattern but not in replace.
678cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 */
688cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	void add_pattern(std::string const & pattern,
698cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd			 std::string const & replace);
708cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd
718cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	/**
728cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 * @param str the input/output string where we search pattern and
738cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 * replace them.
748cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 *
758cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 * Execute loop at max limit time on the set of regular expression
768cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 *
778cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 * Return true if too many match occur and replacing has been stopped
788cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 * due to reach limit_defs_expansion. You can test if some pattern has
798cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 * been matched by saving the input string and comparing it to the new
808cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 * value. There is no way to detect s/a/a because the output string
818cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 * will be identical to the input string.
828cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	 */
838cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	bool execute(std::string & str) const;
848cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Doddprivate:
858cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	struct replace_t {
868cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd		// when this regexp is matched
878cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd		regex_t regexp;
888cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd		// replace the matched part with this string
898cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd		std::string replace;
908cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	};
918cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd
928cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	// helper to execute
938cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	bool do_execute(std::string & str, replace_t const & regexp) const;
948cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	void do_replace(std::string & str, std::string const & replace,
958cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd			regmatch_t const * match) const;
968cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd
978cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	// helper to add_definition() and add_pattern()
988cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	std::string expand_string(std::string const & input);
998cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd
1008cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	// helper to add_pattern
1018cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	std::string substitute_definition(std::string const & pattern);
1028cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd
1038cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	// return the match of throw if idx is invalid
1048cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	regmatch_t const & get_match(regmatch_t const * match, char idx) const;
1058cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd
1068cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	// don't increase too, it have direct impact on performance. This limit
1078cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	// the number of grouping expression allowed in a regular expression
1088cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	// Note than you can use grouping match operator > 9 only in the
1098cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	// replace rule not in match regular expression since POSIX don't allow
1108cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	// more than \9 in matching sequence.
1118cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	static const size_t max_match = 16;
1128cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd
1138cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	size_t limit;
1148cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	size_t limit_defs_expansion;
1158cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	std::vector<replace_t> regex_replace;
1168cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	/// dictionary of regular definition
1178cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	typedef std::map<std::string, std::string> defs_dict;
1188cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd	defs_dict defs;
1198cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd};
1208cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd
1218cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd/**
1228cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * @param regex the regular_expression_replace to fill
1238cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * @param filename the filename from where the deifnition and pattern are read
1248cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd *
1258cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd * add to regex pattern and regular definition read from the given file
1268cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd */
1278cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Doddvoid setup_regex(regular_expression_replace& regex,
1288cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd		 std::string const & filename);
1298cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd
1308cfa702f803c5ef6a2b062a489a1b2cf66b45b5eMike Dodd#endif /* !OP_REGEX_H */
131