RegularExpression.cpp revision 73844aa19a7360b662e2be710fc3c969d6c86606
124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//===-- RegularExpression.cpp -----------------------------------*- C++ -*-===//
224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//
324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//                     The LLVM Compiler Infrastructure
424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//
524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// This file is distributed under the University of Illinois Open Source
624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// License. See LICENSE.TXT for details.
724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//
824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//===----------------------------------------------------------------------===//
924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner
1024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner#include "lldb/Core/RegularExpression.h"
11a408326b499c3ffdfed2378738598c4ad0cf745fEli Friedman#include <string.h>
1224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner
1324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattnerusing namespace lldb_private;
1424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner
1524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//----------------------------------------------------------------------
1624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// Default constructor
1724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//----------------------------------------------------------------------
1824943d2ee8bfaa7cf5893e4709143924157a5c1eChris LattnerRegularExpression::RegularExpression() :
1924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    m_re(),
2024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    m_comp_err (1),
2124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    m_preg(),
2203c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham    m_compile_flags(REG_EXTENDED),
2324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    m_matches()
2424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner{
2524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    memset(&m_preg,0,sizeof(m_preg));
2624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner}
2724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner
2824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//----------------------------------------------------------------------
2924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// Constructor that compiles "re" using "flags" and stores the
3024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// resulting compiled regular expression into this object.
3124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//----------------------------------------------------------------------
3224943d2ee8bfaa7cf5893e4709143924157a5c1eChris LattnerRegularExpression::RegularExpression(const char* re, int flags) :
3324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    m_re(),
3424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    m_comp_err (1),
3597c8957257a3e0b3ce6f46f8e5a28c965e30f357Daniel Dunbar    m_preg(),
3697c8957257a3e0b3ce6f46f8e5a28c965e30f357Daniel Dunbar    m_compile_flags(flags)
3724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner{
3824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    memset(&m_preg,0,sizeof(m_preg));
3903c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham    Compile(re);
4024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner}
4124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner
4224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//----------------------------------------------------------------------
4303c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham// Constructor that compiles "re" using "flags" and stores the
4403c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham// resulting compiled regular expression into this object.
4503c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham//----------------------------------------------------------------------
4603c8ee5aeafcd6c43f10002a4f8096af01780f86Jim InghamRegularExpression::RegularExpression(const char* re) :
4703c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham    m_re(),
4803c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham    m_comp_err (1),
4997c8957257a3e0b3ce6f46f8e5a28c965e30f357Daniel Dunbar    m_preg(),
5097c8957257a3e0b3ce6f46f8e5a28c965e30f357Daniel Dunbar    m_compile_flags(REG_EXTENDED)
5103c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham{
5203c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham    memset(&m_preg,0,sizeof(m_preg));
5303c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham    Compile(re);
5403c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham}
5503c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham
5603c8ee5aeafcd6c43f10002a4f8096af01780f86Jim InghamRegularExpression::RegularExpression(const RegularExpression &rhs)
5703c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham{
5803c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham    memset(&m_preg,0,sizeof(m_preg));
5903c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham    Compile(rhs.GetText(), rhs.GetCompileFlags());
6003c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham}
6103c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham
6203c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Inghamconst RegularExpression &
6303c8ee5aeafcd6c43f10002a4f8096af01780f86Jim InghamRegularExpression::operator= (const RegularExpression &rhs)
6403c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham{
6503c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham    if (&rhs != this)
6603c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham    {
6703c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham        Compile (rhs.GetText(), rhs.GetCompileFlags());
6803c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham    }
6903c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham    return *this;
7003c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham}
7103c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham//----------------------------------------------------------------------
7224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// Destructor
7324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//
7424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// Any previosuly compiled regular expression contained in this
7524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// object will be freed.
7624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//----------------------------------------------------------------------
7724943d2ee8bfaa7cf5893e4709143924157a5c1eChris LattnerRegularExpression::~RegularExpression()
7824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner{
7924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    Free();
8024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner}
8124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner
8224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//----------------------------------------------------------------------
8324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// Compile a regular expression using the supplied regular
8424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// expression text and flags. The compied regular expression lives
8524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// in this object so that it can be readily used for regular
8624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// expression matches. Execute() can be called after the regular
8724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// expression is compiled. Any previosuly compiled regular
8824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// expression contained in this object will be freed.
8924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//
9024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// RETURNS
9124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//  True of the refular expression compiles successfully, false
9224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//  otherwise.
9324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//----------------------------------------------------------------------
9424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattnerbool
9503c8ee5aeafcd6c43f10002a4f8096af01780f86Jim InghamRegularExpression::Compile(const char* re)
9603c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham{
9703c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham    return Compile (re, m_compile_flags);
9803c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham}
9903c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham
10003c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Inghambool
10124943d2ee8bfaa7cf5893e4709143924157a5c1eChris LattnerRegularExpression::Compile(const char* re, int flags)
10224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner{
10324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    Free();
10403c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham    m_compile_flags = flags;
10503c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham
10624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    if (re && re[0])
10724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    {
10824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner        m_re = re;
10924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner        m_comp_err = ::regcomp (&m_preg, re, flags);
11024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    }
11124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    else
11224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    {
11324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner        // No valid regular expression
11424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner        m_comp_err = 1;
11524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    }
11624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner
11724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    return m_comp_err == 0;
11824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner}
11924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner
12024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//----------------------------------------------------------------------
12124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// Execute a regular expression match using the compiled regular
12224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// expression that is already in this object against the match
12324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// string "s". If any parens are used for regular expression
12424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// matches "match_count" should indicate the number of regmatch_t
12524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// values that are present in "match_ptr". The regular expression
12624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// will be executed using the "execute_flags".
12724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//----------------------------------------------------------------------
12824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattnerbool
12924943d2ee8bfaa7cf5893e4709143924157a5c1eChris LattnerRegularExpression::Execute(const char* s, size_t num_matches, int execute_flags) const
13024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner{
13124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    int match_result = 1;
13224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    if (m_comp_err == 0)
13324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    {
13424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner        if (num_matches > 0)
13524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner            m_matches.resize(num_matches + 1);
13624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner        else
13724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner            m_matches.clear();
13824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner
13924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner        match_result = ::regexec (&m_preg,
14024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner                                  s,
14124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner                                  m_matches.size(),
142928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton                                  &m_matches[0],
14324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner                                  execute_flags);
14424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    }
14524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    return match_result == 0;
14624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner}
14724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner
14824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattnerbool
14924943d2ee8bfaa7cf5893e4709143924157a5c1eChris LattnerRegularExpression::GetMatchAtIndex (const char* s, uint32_t idx, std::string& match_str) const
15024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner{
15124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    if (idx <= m_preg.re_nsub && idx < m_matches.size())
15224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    {
153928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton        if (m_matches[idx].rm_eo == m_matches[idx].rm_so)
154928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton        {
155928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton            // Matched the empty string...
156928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton            match_str.clear();
157928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton            return true;
158928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton        }
159928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton        else if (m_matches[idx].rm_eo > m_matches[idx].rm_so)
160928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton        {
161928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton            match_str.assign (s + m_matches[idx].rm_so,
162928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton                              m_matches[idx].rm_eo - m_matches[idx].rm_so);
163928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton            return true;
164928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton        }
16524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    }
16624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    return false;
16724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner}
16824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner
16924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner
17024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//----------------------------------------------------------------------
17124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// Returns true if the regular expression compiled and is ready
17224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// for execution.
17324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//----------------------------------------------------------------------
17424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattnerbool
17524943d2ee8bfaa7cf5893e4709143924157a5c1eChris LattnerRegularExpression::IsValid () const
17624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner{
17724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    return m_comp_err == 0;
17824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner}
17924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner
18024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//----------------------------------------------------------------------
18124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// Returns the text that was used to compile the current regular
18224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// expression.
18324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//----------------------------------------------------------------------
18424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattnerconst char*
18524943d2ee8bfaa7cf5893e4709143924157a5c1eChris LattnerRegularExpression::GetText () const
18624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner{
18773844aa19a7360b662e2be710fc3c969d6c86606Greg Clayton    if (m_re.empty())
18873844aa19a7360b662e2be710fc3c969d6c86606Greg Clayton        return NULL;
18924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    return m_re.c_str();
19024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner}
19124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner
19224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//----------------------------------------------------------------------
19324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// Free any contained compiled regular expressions.
19424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//----------------------------------------------------------------------
19524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattnervoid
19624943d2ee8bfaa7cf5893e4709143924157a5c1eChris LattnerRegularExpression::Free()
19724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner{
19824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    if (m_comp_err == 0)
19924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    {
20024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner        m_re.clear();
20124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner        regfree(&m_preg);
20224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner        // Set a compile error since we no longer have a valid regex
20324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner        m_comp_err = 1;
20424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner    }
20524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner}
2066bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton
2076bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Claytonsize_t
2086bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg ClaytonRegularExpression::GetErrorAsCString (char *err_str, size_t err_str_max_len) const
2096bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton{
2106bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton    if (m_comp_err == 0)
2116bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton    {
2126bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton        if (err_str && err_str_max_len)
2136bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton            *err_str = '\0';
2146bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton        return 0;
2156bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton    }
2166bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton
2176bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton    return ::regerror (m_comp_err, &m_preg, err_str, err_str_max_len);
2186bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton}
2196bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton
220886bc3e5cb48e9660692609a7be69ec15b898bd7Enrico Granatabool
221886bc3e5cb48e9660692609a7be69ec15b898bd7Enrico GranataRegularExpression::operator < (const RegularExpression& rhs) const
222886bc3e5cb48e9660692609a7be69ec15b898bd7Enrico Granata{
223886bc3e5cb48e9660692609a7be69ec15b898bd7Enrico Granata    return (m_re < rhs.m_re);
224886bc3e5cb48e9660692609a7be69ec15b898bd7Enrico Granata}
2256bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton
226