1// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "testing/gtest/include/gtest/gtest.h"
6#include "third_party/WebKit/Source/WebKit/chromium/public/WebRegularExpression.h"
7#include "third_party/WebKit/Source/WebKit/chromium/public/WebString.h"
8#include "third_party/WebKit/Source/WebKit/chromium/public/WebTextCaseSensitivity.h"
9
10using namespace WebKit;
11
12namespace {
13
14class RegexTest : public testing::Test {
15};
16
17struct Match {
18  const WebUChar* text;
19  const int textLength;
20  const int matchPosition;
21  const int matchLength;
22};
23
24void testMatches(const WebRegularExpression& regex,
25                 const Match* matches,
26                 const size_t nMatches) {
27
28  for (size_t i = 0; i < nMatches; ++i) {
29    int matchedLength = matches[i].textLength;
30    EXPECT_EQ(matches[i].matchPosition, regex.match(
31        WebString(matches[i].text, matches[i].textLength), 0, &matchedLength));
32    if (matches[i].matchPosition != -1)
33      EXPECT_EQ(matches[i].matchLength, matchedLength);
34  }
35
36}
37
38}  // namespace
39
40#define MATCH_DESC(webuchar, matchPosition, matchLength) \
41    { webuchar, arraysize(webuchar), matchPosition, matchLength }
42
43
44TEST(RegexTest, Basic) {
45  // Just make sure we're not completely broken.
46  WebRegularExpression regex("the quick brown fox", WebTextCaseSensitive);
47  EXPECT_EQ(0, regex.match("the quick brown fox"));
48  EXPECT_EQ(1, regex.match(" the quick brown fox"));
49  EXPECT_EQ(3, regex.match("foothe quick brown foxbar"));
50
51  EXPECT_EQ(-1, regex.match("The quick brown FOX"));
52  EXPECT_EQ(-1, regex.match("the quick brown fo"));
53}
54
55TEST(RegexTest, Unicode) {
56  // Make sure we get the right offsets for unicode strings.
57  WebUChar pattern[] = {L'\x6240', L'\x6709', L'\x7f51', L'\x9875'};
58  WebRegularExpression regex(WebString(pattern, arraysize(pattern)),
59                             WebTextCaseInsensitive);
60
61  WebUChar text1[] = {L'\x6240', L'\x6709', L'\x7f51', L'\x9875'};
62  WebUChar text2[] = {L' ', L'\x6240', L'\x6709', L'\x7f51', L'\x9875'};
63  WebUChar text3[] = {L'f', L'o', L'o', L'\x6240', L'\x6709', L'\x7f51', L'\x9875', L'b', L'a', L'r'};
64  WebUChar text4[] = {L'\x4e2d', L'\x6587', L'\x7f51',  L'\x9875', L'\x6240', L'\x6709', L'\x7f51', L'\x9875'};
65
66  const Match matches[] = {
67    MATCH_DESC(text1, 0, 4),
68    MATCH_DESC(text2, 1, 4),
69    MATCH_DESC(text3, 3, 4),
70    MATCH_DESC(text4, 4, 4),
71  };
72
73  testMatches(regex, matches, arraysize(matches));
74}
75
76TEST(RegexTest, UnicodeMixedLength) {
77  WebUChar pattern[] = {L':', L'[', L' ', L'\x2000', L']', L'+', L':'};
78  WebRegularExpression regex(WebString(pattern, arraysize(pattern)),
79                             WebTextCaseInsensitive);
80
81  WebUChar text1[] = {L':', L' ', L' ', L':'};
82  WebUChar text2[] = {L' ', L' ', L':', L' ', L' ', L' ', L' ', L':', L' ', L' '};
83  WebUChar text3[] = {L' ', L':', L' ', L'\x2000', L' ', L':', L' '};
84  WebUChar text4[] = {L'\x6240', L'\x6709', L'\x7f51', L'\x9875', L' ', L':', L' ', L'\x2000', L' ', L'\x2000', L' ', L':', L' '};
85  WebUChar text5[] = {L' '};
86  WebUChar text6[] = {L':', L':'};
87
88  const Match matches[] = {
89    MATCH_DESC(text1, 0, 4),
90    MATCH_DESC(text2, 2, 6),
91    MATCH_DESC(text3, 1, 5),
92    MATCH_DESC(text4, 5, 7),
93    MATCH_DESC(text5, -1, -1),
94    MATCH_DESC(text6, -1, -1),
95  };
96
97  testMatches(regex, matches, arraysize(matches));
98}
99
100TEST(RegexTest, EmptyMatch) {
101  WebRegularExpression regex("|x", WebTextCaseInsensitive);
102  int matchedLength = 0;
103  EXPECT_EQ(0, regex.match("", 0, &matchedLength));
104  EXPECT_EQ(0, matchedLength);
105}
106