RegularExpression.cpp revision 73844aa19a7360b662e2be710fc3c969d6c86606
124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//===-- RegularExpression.cpp -----------------------------------*- C++ -*-===// 224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// 324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// The LLVM Compiler Infrastructure 424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// 524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// This file is distributed under the University of Illinois Open Source 624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// License. See LICENSE.TXT for details. 724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// 824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//===----------------------------------------------------------------------===// 924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner 1024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner#include "lldb/Core/RegularExpression.h" 11a408326b499c3ffdfed2378738598c4ad0cf745fEli Friedman#include <string.h> 1224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner 1324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattnerusing namespace lldb_private; 1424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner 1524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//---------------------------------------------------------------------- 1624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// Default constructor 1724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//---------------------------------------------------------------------- 1824943d2ee8bfaa7cf5893e4709143924157a5c1eChris LattnerRegularExpression::RegularExpression() : 1924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner m_re(), 2024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner m_comp_err (1), 2124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner m_preg(), 2203c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham m_compile_flags(REG_EXTENDED), 2324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner m_matches() 2424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner{ 2524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner memset(&m_preg,0,sizeof(m_preg)); 2624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner} 2724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner 2824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//---------------------------------------------------------------------- 2924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// Constructor that compiles "re" using "flags" and stores the 3024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// resulting compiled regular expression into this object. 3124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//---------------------------------------------------------------------- 3224943d2ee8bfaa7cf5893e4709143924157a5c1eChris LattnerRegularExpression::RegularExpression(const char* re, int flags) : 3324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner m_re(), 3424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner m_comp_err (1), 3597c8957257a3e0b3ce6f46f8e5a28c965e30f357Daniel Dunbar m_preg(), 3697c8957257a3e0b3ce6f46f8e5a28c965e30f357Daniel Dunbar m_compile_flags(flags) 3724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner{ 3824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner memset(&m_preg,0,sizeof(m_preg)); 3903c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham Compile(re); 4024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner} 4124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner 4224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//---------------------------------------------------------------------- 4303c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham// Constructor that compiles "re" using "flags" and stores the 4403c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham// resulting compiled regular expression into this object. 4503c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham//---------------------------------------------------------------------- 4603c8ee5aeafcd6c43f10002a4f8096af01780f86Jim InghamRegularExpression::RegularExpression(const char* re) : 4703c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham m_re(), 4803c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham m_comp_err (1), 4997c8957257a3e0b3ce6f46f8e5a28c965e30f357Daniel Dunbar m_preg(), 5097c8957257a3e0b3ce6f46f8e5a28c965e30f357Daniel Dunbar m_compile_flags(REG_EXTENDED) 5103c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham{ 5203c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham memset(&m_preg,0,sizeof(m_preg)); 5303c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham Compile(re); 5403c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham} 5503c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham 5603c8ee5aeafcd6c43f10002a4f8096af01780f86Jim InghamRegularExpression::RegularExpression(const RegularExpression &rhs) 5703c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham{ 5803c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham memset(&m_preg,0,sizeof(m_preg)); 5903c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham Compile(rhs.GetText(), rhs.GetCompileFlags()); 6003c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham} 6103c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham 6203c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Inghamconst RegularExpression & 6303c8ee5aeafcd6c43f10002a4f8096af01780f86Jim InghamRegularExpression::operator= (const RegularExpression &rhs) 6403c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham{ 6503c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham if (&rhs != this) 6603c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham { 6703c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham Compile (rhs.GetText(), rhs.GetCompileFlags()); 6803c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham } 6903c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham return *this; 7003c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham} 7103c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham//---------------------------------------------------------------------- 7224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// Destructor 7324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// 7424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// Any previosuly compiled regular expression contained in this 7524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// object will be freed. 7624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//---------------------------------------------------------------------- 7724943d2ee8bfaa7cf5893e4709143924157a5c1eChris LattnerRegularExpression::~RegularExpression() 7824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner{ 7924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner Free(); 8024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner} 8124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner 8224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//---------------------------------------------------------------------- 8324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// Compile a regular expression using the supplied regular 8424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// expression text and flags. The compied regular expression lives 8524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// in this object so that it can be readily used for regular 8624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// expression matches. Execute() can be called after the regular 8724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// expression is compiled. Any previosuly compiled regular 8824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// expression contained in this object will be freed. 8924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// 9024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// RETURNS 9124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// True of the refular expression compiles successfully, false 9224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// otherwise. 9324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//---------------------------------------------------------------------- 9424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattnerbool 9503c8ee5aeafcd6c43f10002a4f8096af01780f86Jim InghamRegularExpression::Compile(const char* re) 9603c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham{ 9703c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham return Compile (re, m_compile_flags); 9803c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham} 9903c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham 10003c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Inghambool 10124943d2ee8bfaa7cf5893e4709143924157a5c1eChris LattnerRegularExpression::Compile(const char* re, int flags) 10224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner{ 10324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner Free(); 10403c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham m_compile_flags = flags; 10503c8ee5aeafcd6c43f10002a4f8096af01780f86Jim Ingham 10624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner if (re && re[0]) 10724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner { 10824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner m_re = re; 10924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner m_comp_err = ::regcomp (&m_preg, re, flags); 11024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner } 11124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner else 11224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner { 11324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner // No valid regular expression 11424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner m_comp_err = 1; 11524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner } 11624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner 11724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner return m_comp_err == 0; 11824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner} 11924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner 12024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//---------------------------------------------------------------------- 12124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// Execute a regular expression match using the compiled regular 12224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// expression that is already in this object against the match 12324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// string "s". If any parens are used for regular expression 12424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// matches "match_count" should indicate the number of regmatch_t 12524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// values that are present in "match_ptr". The regular expression 12624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// will be executed using the "execute_flags". 12724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//---------------------------------------------------------------------- 12824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattnerbool 12924943d2ee8bfaa7cf5893e4709143924157a5c1eChris LattnerRegularExpression::Execute(const char* s, size_t num_matches, int execute_flags) const 13024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner{ 13124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner int match_result = 1; 13224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner if (m_comp_err == 0) 13324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner { 13424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner if (num_matches > 0) 13524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner m_matches.resize(num_matches + 1); 13624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner else 13724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner m_matches.clear(); 13824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner 13924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner match_result = ::regexec (&m_preg, 14024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner s, 14124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner m_matches.size(), 142928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton &m_matches[0], 14324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner execute_flags); 14424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner } 14524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner return match_result == 0; 14624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner} 14724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner 14824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattnerbool 14924943d2ee8bfaa7cf5893e4709143924157a5c1eChris LattnerRegularExpression::GetMatchAtIndex (const char* s, uint32_t idx, std::string& match_str) const 15024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner{ 15124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner if (idx <= m_preg.re_nsub && idx < m_matches.size()) 15224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner { 153928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton if (m_matches[idx].rm_eo == m_matches[idx].rm_so) 154928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton { 155928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton // Matched the empty string... 156928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton match_str.clear(); 157928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton return true; 158928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton } 159928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton else if (m_matches[idx].rm_eo > m_matches[idx].rm_so) 160928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton { 161928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton match_str.assign (s + m_matches[idx].rm_so, 162928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton m_matches[idx].rm_eo - m_matches[idx].rm_so); 163928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton return true; 164928d130789bd8ee38ce434ccb2d564e6069cf018Greg Clayton } 16524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner } 16624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner return false; 16724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner} 16824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner 16924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner 17024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//---------------------------------------------------------------------- 17124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// Returns true if the regular expression compiled and is ready 17224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// for execution. 17324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//---------------------------------------------------------------------- 17424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattnerbool 17524943d2ee8bfaa7cf5893e4709143924157a5c1eChris LattnerRegularExpression::IsValid () const 17624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner{ 17724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner return m_comp_err == 0; 17824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner} 17924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner 18024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//---------------------------------------------------------------------- 18124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// Returns the text that was used to compile the current regular 18224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// expression. 18324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//---------------------------------------------------------------------- 18424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattnerconst char* 18524943d2ee8bfaa7cf5893e4709143924157a5c1eChris LattnerRegularExpression::GetText () const 18624943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner{ 18773844aa19a7360b662e2be710fc3c969d6c86606Greg Clayton if (m_re.empty()) 18873844aa19a7360b662e2be710fc3c969d6c86606Greg Clayton return NULL; 18924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner return m_re.c_str(); 19024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner} 19124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner 19224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//---------------------------------------------------------------------- 19324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner// Free any contained compiled regular expressions. 19424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner//---------------------------------------------------------------------- 19524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattnervoid 19624943d2ee8bfaa7cf5893e4709143924157a5c1eChris LattnerRegularExpression::Free() 19724943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner{ 19824943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner if (m_comp_err == 0) 19924943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner { 20024943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner m_re.clear(); 20124943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner regfree(&m_preg); 20224943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner // Set a compile error since we no longer have a valid regex 20324943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner m_comp_err = 1; 20424943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner } 20524943d2ee8bfaa7cf5893e4709143924157a5c1eChris Lattner} 2066bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton 2076bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Claytonsize_t 2086bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg ClaytonRegularExpression::GetErrorAsCString (char *err_str, size_t err_str_max_len) const 2096bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton{ 2106bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton if (m_comp_err == 0) 2116bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton { 2126bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton if (err_str && err_str_max_len) 2136bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton *err_str = '\0'; 2146bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton return 0; 2156bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton } 2166bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton 2176bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton return ::regerror (m_comp_err, &m_preg, err_str, err_str_max_len); 2186bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton} 2196bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton 220886bc3e5cb48e9660692609a7be69ec15b898bd7Enrico Granatabool 221886bc3e5cb48e9660692609a7be69ec15b898bd7Enrico GranataRegularExpression::operator < (const RegularExpression& rhs) const 222886bc3e5cb48e9660692609a7be69ec15b898bd7Enrico Granata{ 223886bc3e5cb48e9660692609a7be69ec15b898bd7Enrico Granata return (m_re < rhs.m_re); 224886bc3e5cb48e9660692609a7be69ec15b898bd7Enrico Granata} 2256bc0b5d69f6f5f46055be6cfea6f9a0eb11b1943Greg Clayton 226