1ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin//===- llvm/unittest/Support/RegexTest.cpp - Regex tests --===// 2ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin// 3ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin// The LLVM Compiler Infrastructure 4ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin// 5ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin// This file is distributed under the University of Illinois Open Source 6ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin// License. See LICENSE.TXT for details. 7ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin// 8ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin//===----------------------------------------------------------------------===// 9ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 10ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#include "llvm/Support/Regex.h" 11528700863adefca8de461ce28a7d903729fb96b4Chris Lattner#include "llvm/ADT/SmallVector.h" 125a88dda4be791426ab4d20a6a6c9c65d66614a27Chandler Carruth#include "gtest/gtest.h" 13ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#include <cstring> 14ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 15ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwinusing namespace llvm; 16ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwinnamespace { 17ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 18ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwinclass RegexTest : public ::testing::Test { 19ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin}; 20ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 21ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok EdwinTEST_F(RegexTest, Basics) { 22ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin Regex r1("^[0-9]+$"); 23ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_TRUE(r1.match("916")); 24ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_TRUE(r1.match("9")); 25ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_FALSE(r1.match("9a")); 26ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 27ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin SmallVector<StringRef, 1> Matches; 2881f46d9ce1888308b33336f9bea72147430da36bChris Lattner Regex r2("[0-9]+"); 29ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_TRUE(r2.match("aa216b", &Matches)); 30ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_EQ(1u, Matches.size()); 31ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_EQ("216", Matches[0].str()); 32ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 3381f46d9ce1888308b33336f9bea72147430da36bChris Lattner Regex r3("[0-9]+([a-f])?:([0-9]+)"); 34ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_TRUE(r3.match("9a:513b", &Matches)); 35ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_EQ(3u, Matches.size()); 36ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_EQ("9a:513", Matches[0].str()); 37ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_EQ("a", Matches[1].str()); 38ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_EQ("513", Matches[2].str()); 39ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 40ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_TRUE(r3.match("9:513b", &Matches)); 41ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_EQ(3u, Matches.size()); 42ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_EQ("9:513", Matches[0].str()); 43ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_EQ("", Matches[1].str()); 44ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_EQ("513", Matches[2].str()); 45ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 4681f46d9ce1888308b33336f9bea72147430da36bChris Lattner Regex r4("a[^b]+b"); 47ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin std::string String="axxb"; 48ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin String[2] = '\0'; 49ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_FALSE(r4.match("abb")); 50ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_TRUE(r4.match(String, &Matches)); 51ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_EQ(1u, Matches.size()); 52ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_EQ(String, Matches[0].str()); 53ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 54ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin std::string NulPattern="X[0-9]+X([a-f])?:([0-9]+)"; 55ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin String="YX99a:513b"; 56ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin NulPattern[7] = '\0'; 5781f46d9ce1888308b33336f9bea72147430da36bChris Lattner Regex r5(NulPattern); 58ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_FALSE(r5.match(String)); 59ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_FALSE(r5.match("X9")); 60ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin String[3]='\0'; 61ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin EXPECT_TRUE(r5.match(String)); 62ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin} 63ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 646b731486d4460e5f1088a6066c0081af048c1e45Eli BenderskyTEST_F(RegexTest, Backreferences) { 656b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky Regex r1("([a-z]+)_\\1"); 666b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky SmallVector<StringRef, 4> Matches; 676b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky EXPECT_TRUE(r1.match("abc_abc", &Matches)); 686b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky EXPECT_EQ(2u, Matches.size()); 696b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky EXPECT_FALSE(r1.match("abc_ab", &Matches)); 706b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky 716b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky Regex r2("a([0-9])b\\1c\\1"); 726b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky EXPECT_TRUE(r2.match("a4b4c4", &Matches)); 736b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky EXPECT_EQ(2u, Matches.size()); 746b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky EXPECT_EQ("4", Matches[1].str()); 756b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky EXPECT_FALSE(r2.match("a2b2c3")); 766b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky 776b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky Regex r3("a([0-9])([a-z])b\\1\\2"); 786b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky EXPECT_TRUE(r3.match("a6zb6z", &Matches)); 796b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky EXPECT_EQ(3u, Matches.size()); 806b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky EXPECT_EQ("6", Matches[1].str()); 816b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky EXPECT_EQ("z", Matches[2].str()); 826b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky EXPECT_FALSE(r3.match("a6zb6y")); 836b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky EXPECT_FALSE(r3.match("a6zb7z")); 846b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky} 856b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky 86d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel DunbarTEST_F(RegexTest, Substitution) { 87d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar std::string Error; 88d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar 89d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar EXPECT_EQ("aNUMber", Regex("[0-9]+").sub("NUM", "a1234ber")); 90d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar 91d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar // Standard Escapes 92d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar EXPECT_EQ("a\\ber", Regex("[0-9]+").sub("\\\\", "a1234ber", &Error)); 9336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines EXPECT_EQ("", Error); 94d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar EXPECT_EQ("a\nber", Regex("[0-9]+").sub("\\n", "a1234ber", &Error)); 9536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines EXPECT_EQ("", Error); 96d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar EXPECT_EQ("a\tber", Regex("[0-9]+").sub("\\t", "a1234ber", &Error)); 9736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines EXPECT_EQ("", Error); 98d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar EXPECT_EQ("ajber", Regex("[0-9]+").sub("\\j", "a1234ber", &Error)); 9936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines EXPECT_EQ("", Error); 100d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar 101d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar EXPECT_EQ("aber", Regex("[0-9]+").sub("\\", "a1234ber", &Error)); 102d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar EXPECT_EQ(Error, "replacement string contained trailing backslash"); 103d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar 104d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar // Backreferences 105d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar EXPECT_EQ("aa1234bber", Regex("a[0-9]+b").sub("a\\0b", "a1234ber", &Error)); 10636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines EXPECT_EQ("", Error); 107d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar 108d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar EXPECT_EQ("a1234ber", Regex("a([0-9]+)b").sub("a\\1b", "a1234ber", &Error)); 10936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines EXPECT_EQ("", Error); 110d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar 111d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar EXPECT_EQ("aber", Regex("a[0-9]+b").sub("a\\100b", "a1234ber", &Error)); 112d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar EXPECT_EQ(Error, "invalid backreference string '100'"); 113d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar} 114d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar 115aa80e61b0d79ddf9593f6217063574d0c66c3099Peter CollingbourneTEST_F(RegexTest, IsLiteralERE) { 116aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne EXPECT_TRUE(Regex::isLiteralERE("abc")); 117aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne EXPECT_FALSE(Regex::isLiteralERE("a(bc)")); 118aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne EXPECT_FALSE(Regex::isLiteralERE("^abc")); 119aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne EXPECT_FALSE(Regex::isLiteralERE("abc$")); 120aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne EXPECT_FALSE(Regex::isLiteralERE("a|bc")); 121aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne EXPECT_FALSE(Regex::isLiteralERE("abc*")); 122aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne EXPECT_FALSE(Regex::isLiteralERE("abc+")); 123aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne EXPECT_FALSE(Regex::isLiteralERE("abc?")); 124aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne EXPECT_FALSE(Regex::isLiteralERE("abc.")); 125aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne EXPECT_FALSE(Regex::isLiteralERE("a[bc]")); 126aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne EXPECT_FALSE(Regex::isLiteralERE("abc\\1")); 127aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne EXPECT_FALSE(Regex::isLiteralERE("abc{1,2}")); 128aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne} 129aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne 13036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesTEST_F(RegexTest, Escape) { 13136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines EXPECT_EQ("a\\[bc\\]", Regex::escape("a[bc]")); 13236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines EXPECT_EQ("abc\\{1\\\\,2\\}", Regex::escape("abc{1\\,2}")); 13336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 13436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 135783a0387c5eef62ff50950aa3e977b2652a3c3a5Alexey SamsonovTEST_F(RegexTest, IsValid) { 136783a0387c5eef62ff50950aa3e977b2652a3c3a5Alexey Samsonov std::string Error; 137783a0387c5eef62ff50950aa3e977b2652a3c3a5Alexey Samsonov EXPECT_FALSE(Regex("(foo").isValid(Error)); 138783a0387c5eef62ff50950aa3e977b2652a3c3a5Alexey Samsonov EXPECT_EQ("parentheses not balanced", Error); 139783a0387c5eef62ff50950aa3e977b2652a3c3a5Alexey Samsonov EXPECT_FALSE(Regex("a[b-").isValid(Error)); 140783a0387c5eef62ff50950aa3e977b2652a3c3a5Alexey Samsonov EXPECT_EQ("invalid character range", Error); 141783a0387c5eef62ff50950aa3e977b2652a3c3a5Alexey Samsonov} 142783a0387c5eef62ff50950aa3e977b2652a3c3a5Alexey Samsonov 14336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesTEST_F(RegexTest, MoveConstruct) { 14436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Regex r1("^[0-9]+$"); 14536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Regex r2(std::move(r1)); 14636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines EXPECT_TRUE(r2.match("916")); 14736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 14836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 14936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesTEST_F(RegexTest, MoveAssign) { 15036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Regex r1("^[0-9]+$"); 15136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Regex r2("abc"); 15236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines r2 = std::move(r1); 15336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines EXPECT_TRUE(r2.match("916")); 15436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 15536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 156ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin} 157