1ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin//===- llvm/unittest/Support/RegexTest.cpp - Regex tests --===//
2ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin//
3ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin//                     The LLVM Compiler Infrastructure
4ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin//
5ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin// This file is distributed under the University of Illinois Open Source
6ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin// License. See LICENSE.TXT for details.
7ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin//
8ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin//===----------------------------------------------------------------------===//
9ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin
10ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#include "llvm/Support/Regex.h"
11528700863adefca8de461ce28a7d903729fb96b4Chris Lattner#include "llvm/ADT/SmallVector.h"
125a88dda4be791426ab4d20a6a6c9c65d66614a27Chandler Carruth#include "gtest/gtest.h"
13ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#include <cstring>
14ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin
15ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwinusing namespace llvm;
16ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwinnamespace {
17ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin
18ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwinclass RegexTest : public ::testing::Test {
19ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin};
20ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin
21ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok EdwinTEST_F(RegexTest, Basics) {
22ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  Regex r1("^[0-9]+$");
23ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_TRUE(r1.match("916"));
24ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_TRUE(r1.match("9"));
25ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_FALSE(r1.match("9a"));
26ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin
27ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  SmallVector<StringRef, 1> Matches;
2881f46d9ce1888308b33336f9bea72147430da36bChris Lattner  Regex r2("[0-9]+");
29ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_TRUE(r2.match("aa216b", &Matches));
30ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_EQ(1u, Matches.size());
31ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_EQ("216", Matches[0].str());
32ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin
3381f46d9ce1888308b33336f9bea72147430da36bChris Lattner  Regex r3("[0-9]+([a-f])?:([0-9]+)");
34ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_TRUE(r3.match("9a:513b", &Matches));
35ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_EQ(3u, Matches.size());
36ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_EQ("9a:513", Matches[0].str());
37ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_EQ("a", Matches[1].str());
38ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_EQ("513", Matches[2].str());
39ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin
40ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_TRUE(r3.match("9:513b", &Matches));
41ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_EQ(3u, Matches.size());
42ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_EQ("9:513", Matches[0].str());
43ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_EQ("", Matches[1].str());
44ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_EQ("513", Matches[2].str());
45ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin
4681f46d9ce1888308b33336f9bea72147430da36bChris Lattner  Regex r4("a[^b]+b");
47ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  std::string String="axxb";
48ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  String[2] = '\0';
49ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_FALSE(r4.match("abb"));
50ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_TRUE(r4.match(String, &Matches));
51ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_EQ(1u, Matches.size());
52ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_EQ(String, Matches[0].str());
53ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin
54ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  std::string NulPattern="X[0-9]+X([a-f])?:([0-9]+)";
55ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  String="YX99a:513b";
56ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  NulPattern[7] = '\0';
5781f46d9ce1888308b33336f9bea72147430da36bChris Lattner  Regex r5(NulPattern);
58ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_FALSE(r5.match(String));
59ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_FALSE(r5.match("X9"));
60ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  String[3]='\0';
61ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin  EXPECT_TRUE(r5.match(String));
62ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin}
63ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin
646b731486d4460e5f1088a6066c0081af048c1e45Eli BenderskyTEST_F(RegexTest, Backreferences) {
656b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky  Regex r1("([a-z]+)_\\1");
666b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky  SmallVector<StringRef, 4> Matches;
676b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky  EXPECT_TRUE(r1.match("abc_abc", &Matches));
686b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky  EXPECT_EQ(2u, Matches.size());
696b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky  EXPECT_FALSE(r1.match("abc_ab", &Matches));
706b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky
716b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky  Regex r2("a([0-9])b\\1c\\1");
726b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky  EXPECT_TRUE(r2.match("a4b4c4", &Matches));
736b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky  EXPECT_EQ(2u, Matches.size());
746b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky  EXPECT_EQ("4", Matches[1].str());
756b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky  EXPECT_FALSE(r2.match("a2b2c3"));
766b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky
776b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky  Regex r3("a([0-9])([a-z])b\\1\\2");
786b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky  EXPECT_TRUE(r3.match("a6zb6z", &Matches));
796b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky  EXPECT_EQ(3u, Matches.size());
806b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky  EXPECT_EQ("6", Matches[1].str());
816b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky  EXPECT_EQ("z", Matches[2].str());
826b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky  EXPECT_FALSE(r3.match("a6zb6y"));
836b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky  EXPECT_FALSE(r3.match("a6zb7z"));
846b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky}
856b731486d4460e5f1088a6066c0081af048c1e45Eli Bendersky
86d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel DunbarTEST_F(RegexTest, Substitution) {
87d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar  std::string Error;
88d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar
89d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar  EXPECT_EQ("aNUMber", Regex("[0-9]+").sub("NUM", "a1234ber"));
90d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar
91d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar  // Standard Escapes
92d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar  EXPECT_EQ("a\\ber", Regex("[0-9]+").sub("\\\\", "a1234ber", &Error));
9336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  EXPECT_EQ("", Error);
94d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar  EXPECT_EQ("a\nber", Regex("[0-9]+").sub("\\n", "a1234ber", &Error));
9536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  EXPECT_EQ("", Error);
96d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar  EXPECT_EQ("a\tber", Regex("[0-9]+").sub("\\t", "a1234ber", &Error));
9736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  EXPECT_EQ("", Error);
98d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar  EXPECT_EQ("ajber", Regex("[0-9]+").sub("\\j", "a1234ber", &Error));
9936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  EXPECT_EQ("", Error);
100d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar
101d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar  EXPECT_EQ("aber", Regex("[0-9]+").sub("\\", "a1234ber", &Error));
102d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar  EXPECT_EQ(Error, "replacement string contained trailing backslash");
103d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar
104d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar  // Backreferences
105d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar  EXPECT_EQ("aa1234bber", Regex("a[0-9]+b").sub("a\\0b", "a1234ber", &Error));
10636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  EXPECT_EQ("", Error);
107d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar
108d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar  EXPECT_EQ("a1234ber", Regex("a([0-9]+)b").sub("a\\1b", "a1234ber", &Error));
10936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  EXPECT_EQ("", Error);
110d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar
111d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar  EXPECT_EQ("aber", Regex("a[0-9]+b").sub("a\\100b", "a1234ber", &Error));
112d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar  EXPECT_EQ(Error, "invalid backreference string '100'");
113d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar}
114d2a5c0d8562407f9acab97451a785b513edd4c9bDaniel Dunbar
115aa80e61b0d79ddf9593f6217063574d0c66c3099Peter CollingbourneTEST_F(RegexTest, IsLiteralERE) {
116aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne  EXPECT_TRUE(Regex::isLiteralERE("abc"));
117aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne  EXPECT_FALSE(Regex::isLiteralERE("a(bc)"));
118aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne  EXPECT_FALSE(Regex::isLiteralERE("^abc"));
119aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne  EXPECT_FALSE(Regex::isLiteralERE("abc$"));
120aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne  EXPECT_FALSE(Regex::isLiteralERE("a|bc"));
121aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne  EXPECT_FALSE(Regex::isLiteralERE("abc*"));
122aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne  EXPECT_FALSE(Regex::isLiteralERE("abc+"));
123aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne  EXPECT_FALSE(Regex::isLiteralERE("abc?"));
124aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne  EXPECT_FALSE(Regex::isLiteralERE("abc."));
125aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne  EXPECT_FALSE(Regex::isLiteralERE("a[bc]"));
126aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne  EXPECT_FALSE(Regex::isLiteralERE("abc\\1"));
127aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne  EXPECT_FALSE(Regex::isLiteralERE("abc{1,2}"));
128aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne}
129aa80e61b0d79ddf9593f6217063574d0c66c3099Peter Collingbourne
13036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesTEST_F(RegexTest, Escape) {
13136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  EXPECT_EQ("a\\[bc\\]", Regex::escape("a[bc]"));
13236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  EXPECT_EQ("abc\\{1\\\\,2\\}", Regex::escape("abc{1\\,2}"));
13336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
13436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
135783a0387c5eef62ff50950aa3e977b2652a3c3a5Alexey SamsonovTEST_F(RegexTest, IsValid) {
136783a0387c5eef62ff50950aa3e977b2652a3c3a5Alexey Samsonov  std::string Error;
137783a0387c5eef62ff50950aa3e977b2652a3c3a5Alexey Samsonov  EXPECT_FALSE(Regex("(foo").isValid(Error));
138783a0387c5eef62ff50950aa3e977b2652a3c3a5Alexey Samsonov  EXPECT_EQ("parentheses not balanced", Error);
139783a0387c5eef62ff50950aa3e977b2652a3c3a5Alexey Samsonov  EXPECT_FALSE(Regex("a[b-").isValid(Error));
140783a0387c5eef62ff50950aa3e977b2652a3c3a5Alexey Samsonov  EXPECT_EQ("invalid character range", Error);
141783a0387c5eef62ff50950aa3e977b2652a3c3a5Alexey Samsonov}
142783a0387c5eef62ff50950aa3e977b2652a3c3a5Alexey Samsonov
14336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesTEST_F(RegexTest, MoveConstruct) {
14436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  Regex r1("^[0-9]+$");
14536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  Regex r2(std::move(r1));
14636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  EXPECT_TRUE(r2.match("916"));
14736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
14836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
14936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesTEST_F(RegexTest, MoveAssign) {
15036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  Regex r1("^[0-9]+$");
15136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  Regex r2("abc");
15236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  r2 = std::move(r1);
15336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  EXPECT_TRUE(r2.match("916"));
15436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
15536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
156ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin}
157