1c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Use of this source code is governed by a BSD-style license that can be
3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// found in the LICENSE file.
4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <math.h>
6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <stdarg.h>
7c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <limits>
9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <sstream>
10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/basictypes.h"
12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/string_util.h"
13c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/utf_string_conversions.h"
14c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "testing/gmock/include/gmock/gmock.h"
15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "testing/gtest/include/gtest/gtest.h"
16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
17c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochusing ::testing::ElementsAre;
18c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace base {
20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstatic const struct trim_case {
22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const wchar_t* input;
23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const TrimPositions positions;
24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const wchar_t* output;
25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const TrimPositions return_value;
26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} trim_cases[] = {
27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
29c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"", TRIM_ALL, L"", TRIM_NONE},
32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"  ", TRIM_ALL, L"", TRIM_ALL},
35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
36c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstatic const struct trim_case_ascii {
40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const char* input;
41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const TrimPositions positions;
42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const char* output;
43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const TrimPositions return_value;
44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} trim_cases_ascii[] = {
45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {"", TRIM_ALL, "", TRIM_NONE},
50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {"  ", TRIM_LEADING, "", TRIM_LEADING},
51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {"  ", TRIM_ALL, "", TRIM_ALL},
53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
56c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace {
57c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
58c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Helper used to test TruncateUTF8ToByteSize.
59c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool Truncated(const std::string& input, const size_t byte_size,
60c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch               std::string* output) {
61c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    size_t prev = input.length();
62c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    TruncateUTF8ToByteSize(input, byte_size, output);
63c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return prev != output->length();
64c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
65c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
66c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}  // namespace
67c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
68c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST(StringUtilTest, TruncateUTF8ToByteSize) {
69c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string output;
70c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
71c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Empty strings and invalid byte_size arguments
72c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_FALSE(Truncated("", 0, &output));
73c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output, "");
74c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
75c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output, "");
76c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output));
77c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
78c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
79c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Testing the truncation of valid UTF8 correctly
80c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("abc", 2, &output));
81c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output, "ab");
82c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
83c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare("\xc2\x81"), 0);
84c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
85c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare("\xc2\x81"), 0);
86c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
87c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
88c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
89c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  {
90c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    const char array[] = "\x00\x00\xc2\x81\xc2\x81";
91c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    const std::string array_string(array, arraysize(array));
92c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    EXPECT_TRUE(Truncated(array_string, 4, &output));
93c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
94c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
95c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
96c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  {
97c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    const char array[] = "\x00\xc2\x81\xc2\x81";
98c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    const std::string array_string(array, arraysize(array));
99c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    EXPECT_TRUE(Truncated(array_string, 4, &output));
100c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
101c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
102c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
103c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Testing invalid UTF8
104c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
105c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
106c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
107c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
108c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
109c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
110c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
111c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Testing invalid UTF8 mixed with valid UTF8
112c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
113c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
114c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
115c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
116c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
117c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch              10, &output));
118c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
119c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
120c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch              10, &output));
121c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
122c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
123c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
124c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
125c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Overlong sequences
126c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
127c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
128c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
129c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
130c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
131c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
132c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
133c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
134c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
135c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
136c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
137c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
138c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
139c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
140c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
141c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
142c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
143c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
144c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
145c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
146c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
147c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
148c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
149c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Beyond U+10FFFF (the upper limit of Unicode codespace)
150c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
151c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
152c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
153c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
154c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
155c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
156c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
157c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
158c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
159c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
160c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
161c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
162c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
163c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  {
164c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    const char array[] = "\x00\x00\xfe\xff";
165c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    const std::string array_string(array, arraysize(array));
166c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    EXPECT_TRUE(Truncated(array_string, 4, &output));
167c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
168c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
169c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
170c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Variants on the previous test
171c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  {
172c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    const char array[] = "\xff\xfe\x00\x00";
173c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    const std::string array_string(array, 4);
174c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    EXPECT_FALSE(Truncated(array_string, 4, &output));
175c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
176c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
177c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  {
178c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    const char array[] = "\xff\x00\x00\xfe";
179c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    const std::string array_string(array, arraysize(array));
180c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    EXPECT_TRUE(Truncated(array_string, 4, &output));
181c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
182c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
183c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
184c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
185c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
186c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
187c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
188c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
189c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
190c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
191c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
192c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
193c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
194c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
195c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
196c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Strings in legacy encodings that are valid in UTF-8, but
197c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // are invalid as UTF-8 in real data.
198c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
199c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare("caf"), 0);
200c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
201c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
202c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
203c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
204c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
205c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch              &output));
206c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
207c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
208c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Testing using the same string as input and output.
209c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_FALSE(Truncated(output, 4, &output));
210c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
211c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated(output, 3, &output));
212c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare("\xa7\x41"), 0);
213c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
214c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // "abc" with U+201[CD] in windows-125[0-8]
215c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
216c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare("\x93" "abc"), 0);
217c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
218c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // U+0639 U+064E U+0644 U+064E in ISO-8859-6
219c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
220c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
221c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
222c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
223c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
224c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(output.compare(""), 0);
225c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
226c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
227c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, TrimWhitespace) {
228c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::wstring output;  // Allow contents to carry over to next testcase
229c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (size_t i = 0; i < arraysize(trim_cases); ++i) {
230c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const trim_case& value = trim_cases[i];
231c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(value.return_value,
232c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott              TrimWhitespace(value.input, value.positions, &output));
233c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(value.output, output);
234c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
235c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
236c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Test that TrimWhitespace() can take the same string for input and output
237c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  output = L"  This is a test \r\n";
238c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
239c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(L"This is a test", output);
240c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
241c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Once more, but with a string of whitespace
242c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  output = L"  \r\n";
243c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
244c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(L"", output);
245c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
246c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::string output_ascii;
247c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
248c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const trim_case_ascii& value = trim_cases_ascii[i];
249c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(value.return_value,
250c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott              TrimWhitespace(value.input, value.positions, &output_ascii));
251c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(value.output, output_ascii);
252c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
253c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
254c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
255c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstatic const struct collapse_case {
256c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const wchar_t* input;
257c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const bool trim;
258c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const wchar_t* output;
259c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} collapse_cases[] = {
260c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L" Google Video ", false, L"Google Video"},
261c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"Google Video", false, L"Google Video"},
262c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"", false, L""},
263c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"  ", false, L""},
264c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"\t\rTest String\n", false, L"Test String"},
265c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
266c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"    Test     \n  \t String    ", false, L"Test String"},
267c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
268c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"   Test String", false, L"Test String"},
269c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"Test String    ", false, L"Test String"},
270c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"Test String", false, L"Test String"},
271c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"", true, L""},
272c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"\n", true, L""},
273c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"  \r  ", true, L""},
274c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"\nFoo", true, L"Foo"},
275c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"\r  Foo  ", true, L"Foo"},
276c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L" Foo bar ", true, L"Foo bar"},
277c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"  \tFoo  bar  \n", true, L"Foo bar"},
278c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
279c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
280c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
281c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, CollapseWhitespace) {
282c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
283c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const collapse_case& value = collapse_cases[i];
284c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim));
285c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
286c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
287c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
288c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstatic const struct collapse_case_ascii {
289c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const char* input;
290c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const bool trim;
291c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const char* output;
292c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} collapse_cases_ascii[] = {
293c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {" Google Video ", false, "Google Video"},
294c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {"Google Video", false, "Google Video"},
295c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {"", false, ""},
296c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {"  ", false, ""},
297c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {"\t\rTest String\n", false, "Test String"},
298c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {"    Test     \n  \t String    ", false, "Test String"},
299c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {"   Test String", false, "Test String"},
300c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {"Test String    ", false, "Test String"},
301c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {"Test String", false, "Test String"},
302c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {"", true, ""},
303c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {"\n", true, ""},
304c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {"  \r  ", true, ""},
305c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {"\nFoo", true, "Foo"},
306c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {"\r  Foo  ", true, "Foo"},
307c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {" Foo bar ", true, "Foo bar"},
308c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {"  \tFoo  bar  \n", true, "Foo bar"},
309c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
310c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
311c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
312c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, CollapseWhitespaceASCII) {
313c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
314c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const collapse_case_ascii& value = collapse_cases_ascii[i];
315c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
316c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
317c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
318c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
319c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, ContainsOnlyWhitespaceASCII) {
320c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(ContainsOnlyWhitespaceASCII(""));
321c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" "));
322c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t"));
323c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n  "));
324c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a"));
325c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n  "));
326c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
327c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
328c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, ContainsOnlyWhitespace) {
329c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("")));
330c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" ")));
331c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t")));
332c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n  ")));
333c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a")));
334c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n  ")));
335c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
336c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
337c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, IsStringUTF8) {
338c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(IsStringUTF8("abc"));
339c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
340c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
341c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
342c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
343c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
344c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
345c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // surrogate code points
346c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
347c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
348c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
349c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
350c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // overlong sequences
351c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
352c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
353c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
354c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
355c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
356c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
357c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
358c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
359c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
360c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
361c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
362c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
363c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Beyond U+10FFFF (the upper limit of Unicode codespace)
364c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
365c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
366c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
367c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
368c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
369c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
370c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
371c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
372c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
373c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
374c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
375c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
376c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
377c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
378c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
379c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
380c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Strings in legacy encodings. We can certainly make up strings
381c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // in a legacy encoding that are valid in UTF-8, but in real data,
382c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // most of them are invalid as UTF-8.
383c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
384c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
385c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
386c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // "abc" with U+201[CD] in windows-125[0-8]
387c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
388c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // U+0639 U+064E U+0644 U+064E in ISO-8859-6
389c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
390c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
391c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
3923345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
3933345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // Check that we support Embedded Nulls. The first uses the canonical UTF-8
3943345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // representation, and the second uses a 2-byte sequence. The second version
3953345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
3963345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // given codepoint must be used.
3973345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  static const char kEmbeddedNull[] = "embedded\0null";
3983345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_TRUE(IsStringUTF8(
3993345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
4003345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
401c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
402c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
403c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, ConvertASCII) {
404c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static const char* char_cases[] = {
405c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    "Google Video",
406c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    "Hello, world\n",
407c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    "0123ABCDwxyz \a\b\t\r\n!+,.~"
408c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  };
409c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
410c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static const wchar_t* const wchar_cases[] = {
411c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    L"Google Video",
412c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    L"Hello, world\n",
413c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    L"0123ABCDwxyz \a\b\t\r\n!+,.~"
414c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  };
415c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
416c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (size_t i = 0; i < arraysize(char_cases); ++i) {
417c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_TRUE(IsStringASCII(char_cases[i]));
418c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    std::wstring wide = ASCIIToWide(char_cases[i]);
419c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(wchar_cases[i], wide);
420c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
421c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_TRUE(IsStringASCII(wchar_cases[i]));
422c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    std::string ascii = WideToASCII(wchar_cases[i]);
423c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(char_cases[i], ascii);
424c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
425c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
426c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringASCII("Google \x80Video"));
427c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(IsStringASCII(L"Google \x80Video"));
428c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
429c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Convert empty strings.
430c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::wstring wempty;
431c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::string empty;
432c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(empty, WideToASCII(wempty));
433c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(wempty, ASCIIToWide(empty));
434c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
435c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Convert strings with an embedded NUL character.
436c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const char chars_with_nul[] = "test\0string";
437c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const int length_with_nul = arraysize(chars_with_nul) - 1;
438c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::string string_with_nul(chars_with_nul, length_with_nul);
439c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
440c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
441c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            wide_with_nul.length());
442c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::string narrow_with_nul = WideToASCII(wide_with_nul);
443c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
444c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            narrow_with_nul.length());
445c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
446c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
447c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
448c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, ToUpperASCII) {
449c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ('C', ToUpperASCII('C'));
450c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ('C', ToUpperASCII('c'));
451c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ('2', ToUpperASCII('2'));
452c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
453c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(L'C', ToUpperASCII(L'C'));
454c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(L'C', ToUpperASCII(L'c'));
455c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(L'2', ToUpperASCII(L'2'));
456c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
457c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::string in_place_a("Cc2");
458c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  StringToUpperASCII(&in_place_a);
459c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ("CC2", in_place_a);
460c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
461c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::wstring in_place_w(L"Cc2");
462c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  StringToUpperASCII(&in_place_w);
463c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(L"CC2", in_place_w);
464c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
465c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::string original_a("Cc2");
466c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::string upper_a = StringToUpperASCII(original_a);
467c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ("CC2", upper_a);
468c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
469c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::wstring original_w(L"Cc2");
470c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::wstring upper_w = StringToUpperASCII(original_w);
471c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(L"CC2", upper_w);
472c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
473c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
474c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstatic const struct {
475c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const wchar_t* src_w;
476c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const char*    src_a;
477c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const char*    dst;
478c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} lowercase_cases[] = {
479c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"FoO", "FoO", "foo"},
480c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"foo", "foo", "foo"},
481c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {L"FOO", "FOO", "foo"},
482c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
483c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
484c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, LowerCaseEqualsASCII) {
485c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
486c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w,
487c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                     lowercase_cases[i].dst));
488c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
489c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                     lowercase_cases[i].dst));
490c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
491c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
492c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
493c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, GetByteDisplayUnits) {
494c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static const struct {
495c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int64 bytes;
496c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DataUnits expected;
497c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  } cases[] = {
498c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {0, DATA_UNITS_BYTE},
499c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {512, DATA_UNITS_BYTE},
500c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {10*1024, DATA_UNITS_KIBIBYTE},
501c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {10*1024*1024, DATA_UNITS_MEBIBYTE},
502c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE},
503c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {~(1LL<<63), DATA_UNITS_GIBIBYTE},
504c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#ifdef NDEBUG
505c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {-1, DATA_UNITS_BYTE},
506c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#endif
507c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  };
508c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
509c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
510c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(cases[i].expected, GetByteDisplayUnits(cases[i].bytes));
511c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
512c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
513c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, FormatBytes) {
514c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static const struct {
515c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int64 bytes;
516c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DataUnits units;
5173345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    const char* expected;
5183345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    const char* expected_with_units;
519c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  } cases[] = {
520c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // Expected behavior: we show one post-decimal digit when we have
521c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // under two pre-decimal digits, except in cases where it makes no
522c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // sense (zero or bytes).
523c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // Since we switch units once we cross the 1000 mark, this keeps
524c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // the display of file sizes or bytes consistently around three
525c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // digits.
5263345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    {0, DATA_UNITS_BYTE, "0", "0 B"},
5273345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    {512, DATA_UNITS_BYTE, "512", "512 B"},
5283345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    {512, DATA_UNITS_KIBIBYTE, "0.5", "0.5 kB"},
5293345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    {1024*1024, DATA_UNITS_KIBIBYTE, "1024", "1024 kB"},
5303345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    {1024*1024, DATA_UNITS_MEBIBYTE, "1.0", "1.0 MB"},
5313345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    {1024*1024*1024, DATA_UNITS_GIBIBYTE, "1.0", "1.0 GB"},
5323345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "10.0", "10.0 GB"},
5333345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    {99LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "99.0", "99.0 GB"},
5343345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    {105LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "105", "105 GB"},
535c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    {105LL*1024*1024*1024 + 500LL*1024*1024, DATA_UNITS_GIBIBYTE,
5363345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick     "105", "105 GB"},
5373345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    {~(1LL<<63), DATA_UNITS_GIBIBYTE, "8589934592", "8589934592 GB"},
538c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
5393345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    {99*1024 + 103, DATA_UNITS_KIBIBYTE, "99.1", "99.1 kB"},
5403345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    {1024*1024 + 103, DATA_UNITS_KIBIBYTE, "1024", "1024 kB"},
5413345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    {1024*1024 + 205 * 1024, DATA_UNITS_MEBIBYTE, "1.2", "1.2 MB"},
542c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {1024*1024*1024 + (927 * 1024*1024), DATA_UNITS_GIBIBYTE,
5433345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick     "1.9", "1.9 GB"},
5443345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "10.0", "10.0 GB"},
5453345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    {100LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "100", "100 GB"},
546c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#ifdef NDEBUG
5473345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    {-1, DATA_UNITS_BYTE, "", ""},
548c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#endif
549c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  };
550c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
551c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
5523345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
553c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott              FormatBytes(cases[i].bytes, cases[i].units, false));
5543345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    EXPECT_EQ(ASCIIToUTF16(cases[i].expected_with_units),
555c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott              FormatBytes(cases[i].bytes, cases[i].units, true));
556c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
557c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
558c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
559c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
560c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static const struct {
561c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const char* str;
562c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    string16::size_type start_offset;
563c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const char* find_this;
564c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const char* replace_with;
565c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const char* expected;
566c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  } cases[] = {
567c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {"aaa", 0, "a", "b", "bbb"},
568c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {"abb", 0, "ab", "a", "ab"},
569c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
570c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {"Not found", 0, "x", "0", "Not found"},
571c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {"Not found again", 5, "x", "0", "Not found again"},
572c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {" Making it much longer ", 0, " ", "Four score and seven years ago",
573c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott     "Four score and seven years agoMakingFour score and seven years agoit"
574c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott     "Four score and seven years agomuchFour score and seven years agolonger"
575c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott     "Four score and seven years ago"},
576c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
577c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {"Replace me only me once", 9, "me ", "", "Replace me only once"},
578c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {"abababab", 2, "ab", "c", "abccc"},
579c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  };
580c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
581c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
582c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    string16 str = ASCIIToUTF16(cases[i].str);
583c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
584c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                 ASCIIToUTF16(cases[i].find_this),
585c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                 ASCIIToUTF16(cases[i].replace_with));
586c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
587c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
588c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
589c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
590c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
591c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static const struct {
592c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const char* str;
593c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    string16::size_type start_offset;
594c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const char* find_this;
595c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const char* replace_with;
596c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const char* expected;
597c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  } cases[] = {
598c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {"aaa", 0, "a", "b", "baa"},
599c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {"abb", 0, "ab", "a", "ab"},
600c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {"Removing some substrings inging", 0, "ing", "",
601c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      "Remov some substrings inging"},
602c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {"Not found", 0, "x", "0", "Not found"},
603c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {"Not found again", 5, "x", "0", "Not found again"},
604c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {" Making it much longer ", 0, " ", "Four score and seven years ago",
605c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott     "Four score and seven years agoMaking it much longer "},
606c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
607c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {"Replace me only me once", 4, "me ", "", "Replace only me once"},
608c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {"abababab", 2, "ab", "c", "abcabab"},
609c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  };
610c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
611c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
612c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    string16 str = ASCIIToUTF16(cases[i].str);
613c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
614c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                     ASCIIToUTF16(cases[i].find_this),
615c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                     ASCIIToUTF16(cases[i].replace_with));
616c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
617c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
618c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
619c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
6203345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickTEST(StringUtilTest, HexDigitToInt) {
6213345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(0, HexDigitToInt('0'));
6223345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(1, HexDigitToInt('1'));
6233345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(2, HexDigitToInt('2'));
6243345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(3, HexDigitToInt('3'));
6253345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(4, HexDigitToInt('4'));
6263345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(5, HexDigitToInt('5'));
6273345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(6, HexDigitToInt('6'));
6283345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(7, HexDigitToInt('7'));
6293345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(8, HexDigitToInt('8'));
6303345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(9, HexDigitToInt('9'));
6313345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(10, HexDigitToInt('A'));
6323345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(11, HexDigitToInt('B'));
6333345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(12, HexDigitToInt('C'));
6343345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(13, HexDigitToInt('D'));
6353345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(14, HexDigitToInt('E'));
6363345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(15, HexDigitToInt('F'));
6373345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
6383345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // Verify the lower case as well.
6393345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(10, HexDigitToInt('a'));
6403345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(11, HexDigitToInt('b'));
6413345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(12, HexDigitToInt('c'));
6423345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(13, HexDigitToInt('d'));
6433345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(14, HexDigitToInt('e'));
6443345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(15, HexDigitToInt('f'));
645c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
646c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
647c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// This checks where we can use the assignment operator for a va_list. We need
648c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// a way to do this since Visual C doesn't support va_copy, but assignment on
649c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// va_list is not guaranteed to be a copy. See StringAppendVT which uses this
650c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// capability.
651c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstatic void VariableArgsFunc(const char* format, ...) {
652c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  va_list org;
653c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  va_start(org, format);
654c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
655c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  va_list dup;
656c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  GG_VA_COPY(dup, org);
657c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int i1 = va_arg(org, int);
658c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int j1 = va_arg(org, int);
659c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  char* s1 = va_arg(org, char*);
660c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  double d1 = va_arg(org, double);
661c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  va_end(org);
662c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
663c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int i2 = va_arg(dup, int);
664c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int j2 = va_arg(dup, int);
665c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  char* s2 = va_arg(dup, char*);
666c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  double d2 = va_arg(dup, double);
667c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
668c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(i1, i2);
669c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(j1, j2);
670c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_STREQ(s1, s2);
671c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(d1, d2);
672c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
673c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  va_end(dup);
674c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
675c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
676c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, VAList) {
677c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
678c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
679c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
680c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Test for Tokenize
681c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochtemplate <typename STR>
682c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid TokenizeTest() {
683c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::vector<STR> r;
684c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  size_t size;
685c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
686c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  size = Tokenize(STR("This is a string"), STR(" "), &r);
687c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(4U, size);
688c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ASSERT_EQ(4U, r.size());
689c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[0], STR("This"));
690c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[1], STR("is"));
691c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[2], STR("a"));
692c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[3], STR("string"));
693c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  r.clear();
694c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
695c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  size = Tokenize(STR("one,two,three"), STR(","), &r);
696c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(3U, size);
697c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ASSERT_EQ(3U, r.size());
698c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[0], STR("one"));
699c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[1], STR("two"));
700c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[2], STR("three"));
701c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  r.clear();
702c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
703c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
704c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(3U, size);
705c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ASSERT_EQ(3U, r.size());
706c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[0], STR("one"));
707c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[1], STR("two"));
708c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[2], STR("three;four"));
709c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  r.clear();
710c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
711c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
712c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(4U, size);
713c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ASSERT_EQ(4U, r.size());
714c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[0], STR("one"));
715c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[1], STR("two"));
716c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[2], STR("three"));
717c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[3], STR("four"));
718c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  r.clear();
719c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
720c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  size = Tokenize(STR("one, two, three"), STR(","), &r);
721c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(3U, size);
722c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ASSERT_EQ(3U, r.size());
723c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[0], STR("one"));
724c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[1], STR(" two"));
725c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[2], STR(" three"));
726c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  r.clear();
727c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
728c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  size = Tokenize(STR("one, two, three, "), STR(","), &r);
729c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(4U, size);
730c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ASSERT_EQ(4U, r.size());
731c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[0], STR("one"));
732c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[1], STR(" two"));
733c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[2], STR(" three"));
734c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[3], STR(" "));
735c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  r.clear();
736c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
737c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  size = Tokenize(STR("one, two, three,"), STR(","), &r);
738c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(3U, size);
739c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ASSERT_EQ(3U, r.size());
740c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[0], STR("one"));
741c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[1], STR(" two"));
742c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[2], STR(" three"));
743c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  r.clear();
744c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
745c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  size = Tokenize(STR(""), STR(","), &r);
746c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(0U, size);
747c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ASSERT_EQ(0U, r.size());
748c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  r.clear();
749c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
750c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  size = Tokenize(STR(","), STR(","), &r);
751c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(0U, size);
752c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ASSERT_EQ(0U, r.size());
753c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  r.clear();
754c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
755c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  size = Tokenize(STR(",;:."), STR(".:;,"), &r);
756c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(0U, size);
757c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ASSERT_EQ(0U, r.size());
758c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  r.clear();
759c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
760c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
761c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(1U, size);
762c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ASSERT_EQ(1U, r.size());
763c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[0], STR("a"));
764c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  r.clear();
765c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
766c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
767c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(2U, size);
768c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ASSERT_EQ(2U, r.size());
769c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[0], STR("\ta\t"));
770c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(r[1], STR("b\tcc"));
771c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  r.clear();
772c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
773c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
774c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST(StringUtilTest, TokenizeStdString) {
775c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  TokenizeTest<std::string>();
776c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
777c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
778c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST(StringUtilTest, TokenizeStringPiece) {
779c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  TokenizeTest<base::StringPiece>();
780c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
781c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
782c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Test for JoinString
783c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, JoinString) {
784c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::vector<std::string> in;
785c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ("", JoinString(in, ','));
786c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
787c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  in.push_back("a");
788c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ("a", JoinString(in, ','));
789c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
790c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  in.push_back("b");
791c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  in.push_back("c");
792c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ("a,b,c", JoinString(in, ','));
793c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
794c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  in.push_back("");
795c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ("a,b,c,", JoinString(in, ','));
796c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  in.push_back(" ");
797c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
798c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
799c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
800c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, StartsWith) {
801c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
802c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
803c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
804c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
805c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
806c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
807c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(StartsWithASCII("", "javascript", false));
808c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(StartsWithASCII("", "javascript", true));
809c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(StartsWithASCII("java", "", false));
810c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(StartsWithASCII("java", "", true));
811c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
812c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true));
813c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true));
814c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false));
815c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false));
816c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(StartsWith(L"java", L"javascript", true));
817c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(StartsWith(L"java", L"javascript", false));
818c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(StartsWith(L"", L"javascript", false));
819c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(StartsWith(L"", L"javascript", true));
820c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(StartsWith(L"java", L"", false));
821c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(StartsWith(L"java", L"", true));
822c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
823c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
824c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, EndsWith) {
825c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true));
826c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true));
827c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false));
828c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false));
829c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(EndsWith(L".plug", L".plugin", true));
830c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(EndsWith(L".plug", L".plugin", false));
831c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true));
832c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false));
833c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(EndsWith(L"", L".plugin", false));
834c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_FALSE(EndsWith(L"", L".plugin", true));
835c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", false));
836c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", true));
837c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false));
838c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true));
839c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(EndsWith(L"", L"", false));
840c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_TRUE(EndsWith(L"", L"", true));
841c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
842c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
843c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, GetStringFWithOffsets) {
844c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::vector<string16> subst;
845c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back(ASCIIToUTF16("1"));
846c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back(ASCIIToUTF16("2"));
847c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::vector<size_t> offsets;
848c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
849c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
850c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                            subst,
851c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                            &offsets);
852c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(2U, offsets.size());
853c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(7U, offsets[0]);
854c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(25U, offsets[1]);
855c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  offsets.clear();
856c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
857c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
858c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                            subst,
859c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                            &offsets);
860c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(2U, offsets.size());
861c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(25U, offsets[0]);
862c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(7U, offsets[1]);
863c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  offsets.clear();
864c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
865c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
866c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, ReplaceStringPlaceholders) {
867c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::vector<string16> subst;
868c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back(ASCIIToUTF16("9a"));
869c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back(ASCIIToUTF16("8b"));
870c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back(ASCIIToUTF16("7c"));
871c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back(ASCIIToUTF16("6d"));
872c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back(ASCIIToUTF16("5e"));
873c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back(ASCIIToUTF16("4f"));
874c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back(ASCIIToUTF16("3g"));
875c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back(ASCIIToUTF16("2h"));
876c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back(ASCIIToUTF16("1i"));
877c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
878c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  string16 formatted =
879c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      ReplaceStringPlaceholders(
880c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott          ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
881c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
882c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
883c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
884c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
885c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
886c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Test whether replacestringplaceholders works as expected when there
887c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // are fewer inputs than outputs.
888c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::vector<string16> subst;
889c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back(ASCIIToUTF16("9a"));
890c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back(ASCIIToUTF16("8b"));
891c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back(ASCIIToUTF16("7c"));
892c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
893c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  string16 formatted =
894c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      ReplaceStringPlaceholders(
895c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott          ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
896c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
897c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
898c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
899c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
900c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
901c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::vector<std::string> subst;
902c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back("9a");
903c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back("8b");
904c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back("7c");
905c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back("6d");
906c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back("5e");
907c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back("4f");
908c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back("3g");
909c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back("2h");
910c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  subst.push_back("1i");
911c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
912c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::string formatted =
913c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      ReplaceStringPlaceholders(
914c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott          "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
915c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
916c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
917c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
918c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
9193345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickTEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
9203345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  std::vector<std::string> subst;
9213345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  subst.push_back("a");
9223345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  subst.push_back("b");
9233345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  subst.push_back("c");
9243345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
9253345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick            "$1 $$2 $$$3");
9263345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}
9273345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
928c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, MatchPatternTest) {
9293345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
9303345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_TRUE(MatchPattern("www.google.com", "*"));
9313345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
9323345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
9333345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
9343345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
9353345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
9363345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_FALSE(MatchPattern("", "*.*"));
9373345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_TRUE(MatchPattern("", "*"));
9383345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_TRUE(MatchPattern("", "?"));
9393345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_TRUE(MatchPattern("", ""));
9403345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_FALSE(MatchPattern("Hello", ""));
9413345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
942c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Stop after a certain recursion depth.
9433345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
9443345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
9453345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // Test UTF8 matching.
9463345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
9473345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
9483345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
9493345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // Invalid sequences should be handled as a single invalid character.
9503345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
9513345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // If the pattern has invalid characters, it shouldn't match anything.
9523345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
9533345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
9543345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // Test UTF16 character matching.
9553345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
9563345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                           UTF8ToUTF16("*.com")));
9573345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
9583345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                           UTF8ToUTF16("He??o\\*1*")));
959731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
960731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // This test verifies that consecutive wild cards are collapsed into 1
961731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
962731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // recursion depth).
963731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
964731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick                           UTF8ToUTF16("He********************************o")));
965c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
966c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
967c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, LcpyTest) {
968c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Test the normal case where we fit in our buffer.
969c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {
970c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    char dst[10];
971c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    wchar_t wdst[10];
972c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
973c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
974c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
975c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
976c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
977c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
978c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Test dst_size == 0, nothing should be written to |dst| and we should
979c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // have the equivalent of strlen(src).
980c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {
981c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    char dst[2] = {1, 2};
982c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    wchar_t wdst[2] = {1, 2};
983c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
984c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(1, dst[0]);
985c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(2, dst[1]);
986c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
987c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#if defined(WCHAR_T_IS_UNSIGNED)
988c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(1U, wdst[0]);
989c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(2U, wdst[1]);
990c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#else
991c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(1, wdst[0]);
992c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(2, wdst[1]);
993c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#endif
994c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
995c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
996c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Test the case were we _just_ competely fit including the null.
997c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {
998c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    char dst[8];
999c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    wchar_t wdst[8];
1000c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1001c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1002c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1003c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1004c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
1005c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
1006c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Test the case were we we are one smaller, so we can't fit the null.
1007c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {
1008c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    char dst[7];
1009c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    wchar_t wdst[7];
1010c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1011c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1012c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1013c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1014c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
1015c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
1016c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Test the case were we are just too small.
1017c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  {
1018c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    char dst[3];
1019c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    wchar_t wdst[3];
1020c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1021c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(0, memcmp(dst, "ab", 3));
1022c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1023c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1024c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
1025c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
1026c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
1027c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTEST(StringUtilTest, WprintfFormatPortabilityTest) {
1028c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  struct TestData {
1029c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const wchar_t* input;
1030c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    bool portable;
1031c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  } cases[] = {
1032c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"%ls", true },
1033c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"%s", false },
1034c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"%S", false },
1035c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"%lS", false },
1036c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"Hello, %s", false },
1037c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"%lc", true },
1038c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"%c", false },
1039c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"%C", false },
1040c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"%lC", false },
1041c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"%ls %s", false },
1042c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"%s %ls", false },
1043c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"%s %ls %s", false },
1044c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"%f", true },
1045c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"%f %F", false },
1046c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"%d %D", false },
1047c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"%o %O", false },
1048c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"%u %U", false },
1049c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"%f %d %o %u", true },
1050c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"%-8d (%02.1f%)", true },
1051c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"% 10s", false },
1052c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"% 10ls", true }
1053c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  };
1054c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
1055c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
1056c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
1057c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
1058c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
1059c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST(StringUtilTest, RemoveChars) {
1060c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const char* kRemoveChars = "-/+*";
1061c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string input = "A-+bc/d!*";
1062c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1063c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ("Abcd!", input);
1064c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
1065c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // No characters match kRemoveChars.
1066c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1067c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ("Abcd!", input);
1068c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
1069c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Empty string.
1070c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  input.clear();
1071c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1072c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(std::string(), input);
1073c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
1074c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
1075c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST(StringUtilTest, ContainsOnlyChars) {
1076c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Providing an empty list of characters should return false but for the empty
1077c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // string.
1078c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(ContainsOnlyChars("", ""));
1079c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_FALSE(ContainsOnlyChars("Hello", ""));
1080c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
1081c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(ContainsOnlyChars("", "1234"));
1082c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1083c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1084c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1085c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1086c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
1087c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
1088c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}  // namespace base
1089