1ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin//===-- Regex.h - Regular Expression matcher implementation -*- C++ -*-----===// 2ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin// 3ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin// The LLVM Compiler Infrastructure 4ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin// 5ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin// This file is distributed under the University of Illinois Open Source 6ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin// License. See LICENSE.TXT for details. 7ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin// 8ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin//===----------------------------------------------------------------------===// 9ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin// 106b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky// This file implements a POSIX regular expression matcher. Both Basic and 116b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky// Extended POSIX regular expressions (ERE) are supported. EREs were extended 126b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky// to support backreferences in matches. 136b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky// This implementation also supports matching strings with embedded NUL chars. 14ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin// 15ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin//===----------------------------------------------------------------------===// 16ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 173120c18e08290e80ec4f109678269779cfe9e313Mikhail Glushenkov#ifndef LLVM_SUPPORT_REGEX_H 183120c18e08290e80ec4f109678269779cfe9e313Mikhail Glushenkov#define LLVM_SUPPORT_REGEX_H 193120c18e08290e80ec4f109678269779cfe9e313Mikhail Glushenkov 2036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Support/Compiler.h" 21528700863adefca8de461ce28a7d903729fb96b4Chris Lattner#include <string> 22ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 23ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwinstruct llvm_regex; 24528700863adefca8de461ce28a7d903729fb96b4Chris Lattner 25ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwinnamespace llvm { 26528700863adefca8de461ce28a7d903729fb96b4Chris Lattner class StringRef; 27528700863adefca8de461ce28a7d903729fb96b4Chris Lattner template<typename T> class SmallVectorImpl; 287112c86fc208145334aaa04812c794ce6feef416Mikhail Glushenkov 29ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin class Regex { 30ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin public: 31ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin enum { 3281f46d9ce1888308b33336f9bea72147430da36bChris Lattner NoFlags=0, 33ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin /// Compile for matching that ignores upper/lower case distinctions. 34ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin IgnoreCase=1, 35ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin /// Compile for newline-sensitive matching. With this flag '[^' bracket 367112c86fc208145334aaa04812c794ce6feef416Mikhail Glushenkov /// expressions and '.' never match newline. A ^ anchor matches the 377112c86fc208145334aaa04812c794ce6feef416Mikhail Glushenkov /// null string after any newline in the string in addition to its normal 387112c86fc208145334aaa04812c794ce6feef416Mikhail Glushenkov /// function, and the $ anchor matches the null string before any 39ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin /// newline in the string in addition to its normal function. 406b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky Newline=2, 416b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky /// By default, the POSIX extended regular expression (ERE) syntax is 426b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky /// assumed. Pass this flag to turn on basic regular expressions (BRE) 436b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky /// instead. 446b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky BasicRegex=4 45ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin }; 46ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 476b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky /// Compiles the given regular expression \p Regex. 4838e59891ee4417a9be2f8146ce0ba3269e38ac21Benjamin Kramer Regex(StringRef Regex, unsigned Flags = NoFlags); 4936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Regex(const Regex &) LLVM_DELETED_FUNCTION; 5036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Regex &operator=(Regex regex) { 5136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines std::swap(preg, regex.preg); 5236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines std::swap(error, regex.error); 5336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return *this; 5436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 5536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Regex(Regex &®ex) { 5636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines preg = regex.preg; 5736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines error = regex.error; 58dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines regex.preg = nullptr; 5936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 60ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin ~Regex(); 61ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 62ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin /// isValid - returns the error encountered during regex compilation, or 63ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin /// matching, if any. 64ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin bool isValid(std::string &Error); 65ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 6681f46d9ce1888308b33336f9bea72147430da36bChris Lattner /// getNumMatches - In a valid regex, return the number of parenthesized 6781f46d9ce1888308b33336f9bea72147430da36bChris Lattner /// matches it contains. The number filled in by match will include this 6881f46d9ce1888308b33336f9bea72147430da36bChris Lattner /// many entries plus one for the whole regex (as element 0). 6981f46d9ce1888308b33336f9bea72147430da36bChris Lattner unsigned getNumMatches() const; 707112c86fc208145334aaa04812c794ce6feef416Mikhail Glushenkov 71c5252da873d547a19069eaf9030fec203f128f66Dmitri Gribenko /// matches - Match the regex against a given \p String. 72ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin /// 737a2bdde0a0eebcd2125055e0eacaca040f0b766cChris Lattner /// \param Matches - If given, on a successful match this will be filled in 74c5252da873d547a19069eaf9030fec203f128f66Dmitri Gribenko /// with references to the matched group expressions (inside \p String), 75ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin /// the first group is always the entire pattern. 76528700863adefca8de461ce28a7d903729fb96b4Chris Lattner /// 77528700863adefca8de461ce28a7d903729fb96b4Chris Lattner /// This returns true on a successful match. 78dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines bool match(StringRef String, SmallVectorImpl<StringRef> *Matches = nullptr); 79d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar 80d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar /// sub - Return the result of replacing the first match of the regex in 81c5252da873d547a19069eaf9030fec203f128f66Dmitri Gribenko /// \p String with the \p Repl string. Backreferences like "\0" in the 82d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar /// replacement string are replaced with the appropriate match substring. 83d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar /// 84d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar /// Note that the replacement string has backslash escaping performed on 85d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar /// it. Invalid backreferences are ignored (replaced by empty strings). 86d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar /// 87d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar /// \param Error If non-null, any errors in the substitution (invalid 88d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar /// backreferences, trailing backslashes) will be recorded as a non-empty 89d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar /// string. 90dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines std::string sub(StringRef Repl, StringRef String, 91dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines std::string *Error = nullptr); 92d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar 93aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne /// \brief If this function returns true, ^Str$ is an extended regular 94aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne /// expression that matches Str and only Str. 95aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne static bool isLiteralERE(StringRef Str); 96aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne 9736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines /// \brief Turn String into a regex by escaping its special characters. 9836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines static std::string escape(StringRef String); 9936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 100ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin private: 101ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin struct llvm_regex *preg; 102ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin int error; 103ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin }; 104ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin} 1053120c18e08290e80ec4f109678269779cfe9e313Mikhail Glushenkov 1063120c18e08290e80ec4f109678269779cfe9e313Mikhail Glushenkov#endif // LLVM_SUPPORT_REGEX_H 107