1fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Copyright 2008 Google Inc. All Rights Reserved.
2fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Author: xpeng@google.com (Peter Peng)
3fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville
4fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#include <google/protobuf/stubs/common.h>
5fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#include <gtest/gtest.h>
6fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville
7fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillenamespace google {
8fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillenamespace protobuf {
9fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillenamespace internal {
10fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillenamespace {
11fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville
12fbaaef999ba563838ebd00874ed8a1c01fbf286dWink SavilleTEST(StructurallyValidTest, ValidUTF8String) {
13fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville  // On GCC, this string can be written as:
14fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville  //   "abcd 1234 - \u2014\u2013\u2212"
15fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville  // MSVC seems to interpret \u differently.
16d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville  string valid_str("abcd 1234 - \342\200\224\342\200\223\342\210\222 - xyz789");
17fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville  EXPECT_TRUE(IsStructurallyValidUTF8(valid_str.data(),
18fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville                                      valid_str.size()));
19d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville  // Additional check for pointer alignment
20d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville  for (int i = 1; i < 8; ++i) {
21d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville    EXPECT_TRUE(IsStructurallyValidUTF8(valid_str.data() + i,
22d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville                                        valid_str.size() - i));
23d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville  }
24fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville}
25fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville
26fbaaef999ba563838ebd00874ed8a1c01fbf286dWink SavilleTEST(StructurallyValidTest, InvalidUTF8String) {
27d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville  const string invalid_str("abcd\xA0\xB0\xA0\xB0\xA0\xB0 - xyz789");
28fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville  EXPECT_FALSE(IsStructurallyValidUTF8(invalid_str.data(),
29fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville                                       invalid_str.size()));
30d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville  // Additional check for pointer alignment
31d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville  for (int i = 1; i < 8; ++i) {
32d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville    EXPECT_FALSE(IsStructurallyValidUTF8(invalid_str.data() + i,
33d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville                                         invalid_str.size() - i));
34d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville  }
35fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville}
36fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville
37fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville}  // namespace
38fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville}  // namespace internal
39fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville}  // namespace protobuf
40fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville}  // namespace google
41