1ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin//===-- Regex.h - Regular Expression matcher implementation -*- C++ -*-----===//
2ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin//
3ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin//                     The LLVM Compiler Infrastructure
4ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin//
5ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin// This file is distributed under the University of Illinois Open Source
6ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin// License. See LICENSE.TXT for details.
7ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin//
8ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin//===----------------------------------------------------------------------===//
9ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin//
106b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky// This file implements a POSIX regular expression matcher.  Both Basic and
116b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky// Extended POSIX regular expressions (ERE) are supported.  EREs were extended
126b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky// to support backreferences in matches.
136b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky// This implementation also supports matching strings with embedded NUL chars.
14ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin//
15ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin//===----------------------------------------------------------------------===//
16ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin
173120c18e08290e80ec4f109678269779cfe9e313Mikhail Glushenkov#ifndef LLVM_SUPPORT_REGEX_H
183120c18e08290e80ec4f109678269779cfe9e313Mikhail Glushenkov#define LLVM_SUPPORT_REGEX_H
193120c18e08290e80ec4f109678269779cfe9e313Mikhail Glushenkov
2036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Support/Compiler.h"
21528700863adefca8de461ce28a7d903729fb96b4Chris Lattner#include <string>
22ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin
23ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwinstruct llvm_regex;
24528700863adefca8de461ce28a7d903729fb96b4Chris Lattner
25ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwinnamespace llvm {
26528700863adefca8de461ce28a7d903729fb96b4Chris Lattner  class StringRef;
27528700863adefca8de461ce28a7d903729fb96b4Chris Lattner  template<typename T> class SmallVectorImpl;
287112c86fc208145334aaa04812c794ce6feef416Mikhail Glushenkov
29ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  class Regex {
30ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  public:
31ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin    enum {
3281f46d9ce1888308b33336f9bea72147430da36bChris Lattner      NoFlags=0,
33ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin      /// Compile for matching that ignores upper/lower case distinctions.
34ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin      IgnoreCase=1,
35ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin      /// Compile for newline-sensitive matching. With this flag '[^' bracket
367112c86fc208145334aaa04812c794ce6feef416Mikhail Glushenkov      /// expressions and '.' never match newline. A ^ anchor matches the
377112c86fc208145334aaa04812c794ce6feef416Mikhail Glushenkov      /// null string after any newline in the string in addition to its normal
387112c86fc208145334aaa04812c794ce6feef416Mikhail Glushenkov      /// function, and the $ anchor matches the null string before any
39ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin      /// newline in the string in addition to its normal function.
406b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky      Newline=2,
416b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky      /// By default, the POSIX extended regular expression (ERE) syntax is
426b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky      /// assumed. Pass this flag to turn on basic regular expressions (BRE)
436b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky      /// instead.
446b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky      BasicRegex=4
45ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin    };
46ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin
476b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky    /// Compiles the given regular expression \p Regex.
4838e59891ee4417a9be2f8146ce0ba3269e38ac21Benjamin Kramer    Regex(StringRef Regex, unsigned Flags = NoFlags);
4936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    Regex(const Regex &) LLVM_DELETED_FUNCTION;
5036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    Regex &operator=(Regex regex) {
5136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      std::swap(preg, regex.preg);
5236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      std::swap(error, regex.error);
5336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return *this;
5436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
5536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    Regex(Regex &&regex) {
5636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      preg = regex.preg;
5736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      error = regex.error;
58dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      regex.preg = nullptr;
5936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
60ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin    ~Regex();
61ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin
62ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin    /// isValid - returns the error encountered during regex compilation, or
63ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin    /// matching, if any.
64ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin    bool isValid(std::string &Error);
65ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin
6681f46d9ce1888308b33336f9bea72147430da36bChris Lattner    /// getNumMatches - In a valid regex, return the number of parenthesized
6781f46d9ce1888308b33336f9bea72147430da36bChris Lattner    /// matches it contains.  The number filled in by match will include this
6881f46d9ce1888308b33336f9bea72147430da36bChris Lattner    /// many entries plus one for the whole regex (as element 0).
6981f46d9ce1888308b33336f9bea72147430da36bChris Lattner    unsigned getNumMatches() const;
707112c86fc208145334aaa04812c794ce6feef416Mikhail Glushenkov
71c5252da873d547a19069eaf9030fec203f128f66Dmitri Gribenko    /// matches - Match the regex against a given \p String.
72ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin    ///
737a2bdde0a0eebcd2125055e0eacaca040f0b766cChris Lattner    /// \param Matches - If given, on a successful match this will be filled in
74c5252da873d547a19069eaf9030fec203f128f66Dmitri Gribenko    /// with references to the matched group expressions (inside \p String),
75ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin    /// the first group is always the entire pattern.
76528700863adefca8de461ce28a7d903729fb96b4Chris Lattner    ///
77528700863adefca8de461ce28a7d903729fb96b4Chris Lattner    /// This returns true on a successful match.
78dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    bool match(StringRef String, SmallVectorImpl<StringRef> *Matches = nullptr);
79d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar
80d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar    /// sub - Return the result of replacing the first match of the regex in
81c5252da873d547a19069eaf9030fec203f128f66Dmitri Gribenko    /// \p String with the \p Repl string. Backreferences like "\0" in the
82d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar    /// replacement string are replaced with the appropriate match substring.
83d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar    ///
84d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar    /// Note that the replacement string has backslash escaping performed on
85d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar    /// it. Invalid backreferences are ignored (replaced by empty strings).
86d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar    ///
87d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar    /// \param Error If non-null, any errors in the substitution (invalid
88d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar    /// backreferences, trailing backslashes) will be recorded as a non-empty
89d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar    /// string.
90dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    std::string sub(StringRef Repl, StringRef String,
91dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                    std::string *Error = nullptr);
92d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar
93aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne    /// \brief If this function returns true, ^Str$ is an extended regular
94aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne    /// expression that matches Str and only Str.
95aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne    static bool isLiteralERE(StringRef Str);
96aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne
9736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    /// \brief Turn String into a regex by escaping its special characters.
9836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    static std::string escape(StringRef String);
9936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
100ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  private:
101ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin    struct llvm_regex *preg;
102ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin    int error;
103ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  };
104ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin}
1053120c18e08290e80ec4f109678269779cfe9e313Mikhail Glushenkov
1063120c18e08290e80ec4f109678269779cfe9e313Mikhail Glushenkov#endif // LLVM_SUPPORT_REGEX_H
107