15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Protocol Buffers - Google's data interchange format
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2008 Google Inc.  All rights reserved.
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// http://code.google.com/p/protobuf/
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Redistribution and use in source and binary forms, with or without
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// modification, are permitted provided that the following conditions are
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// met:
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//     * Redistributions of source code must retain the above copyright
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// notice, this list of conditions and the following disclaimer.
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//     * Redistributions in binary form must reproduce the above
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// copyright notice, this list of conditions and the following disclaimer
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// in the documentation and/or other materials provided with the
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// distribution.
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//     * Neither the name of Google Inc. nor the names of its
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// contributors may be used to endorse or promote products derived from
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// this software without specific prior written permission.
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Author: kenton@google.com (Kenton Varda)
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//  Based on original Protocol Buffers design by
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//  Sanjay Ghemawat, Jeff Dean, and others.
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <limits.h>
36ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch#include <math.h>
37ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
38ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch#include <vector>
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <google/protobuf/io/tokenizer.h>
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <google/protobuf/io/zero_copy_stream_impl.h>
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <google/protobuf/stubs/common.h>
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <google/protobuf/stubs/strutil.h>
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <google/protobuf/stubs/substitute.h>
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <google/protobuf/testing/googletest.h>
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <gtest/gtest.h>
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace google {
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace protobuf {
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace io {
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace {
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ===================================================================
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Data-Driven Test Infrastructure
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// TODO(kenton):  This is copied from coded_stream_unittest.  This is
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   temporary until these fetaures are integrated into gTest itself.
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// TEST_1D and TEST_2D are macros I'd eventually like to see added to
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// gTest.  These macros can be used to declare tests which should be
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// run multiple times, once for each item in some input array.  TEST_1D
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// tests all cases in a single input array.  TEST_2D tests all
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// combinations of cases from two arrays.  The arrays must be statically
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// defined such that the GOOGLE_ARRAYSIZE() macro works on them.  Example:
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// int kCases[] = {1, 2, 3, 4}
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// TEST_1D(MyFixture, MyTest, kCases) {
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   EXPECT_GT(kCases_case, 0);
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// }
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This test iterates through the numbers 1, 2, 3, and 4 and tests that
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// they are all grater than zero.  In case of failure, the exact case
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// which failed will be printed.  The case type must be printable using
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ostream::operator<<.
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define TEST_1D(FIXTURE, NAME, CASES)                                      \
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  class FIXTURE##_##NAME##_DD : public FIXTURE {                           \
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   protected:                                                              \
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    template <typename CaseType>                                           \
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    void DoSingleCase(const CaseType& CASES##_case);                       \
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };                                                                       \
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                                                           \
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TEST_F(FIXTURE##_##NAME##_DD, NAME) {                                    \
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES); i++) {                           \
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SCOPED_TRACE(testing::Message()                                      \
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        << #CASES " case #" << i << ": " << CASES[i]);                     \
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      DoSingleCase(CASES[i]);                                              \
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }                                                                      \
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }                                                                        \
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                                                           \
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  template <typename CaseType>                                             \
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType& CASES##_case)
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define TEST_2D(FIXTURE, NAME, CASES1, CASES2)                             \
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  class FIXTURE##_##NAME##_DD : public FIXTURE {                           \
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   protected:                                                              \
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    template <typename CaseType1, typename CaseType2>                      \
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    void DoSingleCase(const CaseType1& CASES1##_case,                      \
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      const CaseType2& CASES2##_case);                     \
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };                                                                       \
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                                                           \
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TEST_F(FIXTURE##_##NAME##_DD, NAME) {                                    \
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES1); i++) {                          \
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      for (int j = 0; j < GOOGLE_ARRAYSIZE(CASES2); j++) {                        \
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        SCOPED_TRACE(testing::Message()                                    \
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          << #CASES1 " case #" << i << ": " << CASES1[i] << ", "           \
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          << #CASES2 " case #" << j << ": " << CASES2[j]);                 \
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        DoSingleCase(CASES1[i], CASES2[j]);                                \
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }                                                                    \
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }                                                                      \
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }                                                                        \
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                                                           \
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  template <typename CaseType1, typename CaseType2>                        \
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType1& CASES1##_case, \
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                           const CaseType2& CASES2##_case)
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -------------------------------------------------------------------
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// An input stream that is basically like an ArrayInputStream but sometimes
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// returns empty buffers, just to throw us off.
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class TestInputStream : public ZeroCopyInputStream {
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TestInputStream(const void* data, int size, int block_size)
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : array_stream_(data, size, block_size), counter_(0) {}
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ~TestInputStream() {}
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // implements ZeroCopyInputStream ----------------------------------
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool Next(const void** data, int* size) {
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // We'll return empty buffers starting with the first buffer, and every
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // 3 and 5 buffers after that.
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (counter_ % 3 == 0 || counter_ % 5 == 0) {
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *data = NULL;
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *size = 0;
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++counter_;
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return true;
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++counter_;
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return array_stream_.Next(data, size);
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void BackUp(int count)  { return array_stream_.BackUp(count); }
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool Skip(int count)    { return array_stream_.Skip(count);   }
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int64 ByteCount() const { return array_stream_.ByteCount();   }
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ArrayInputStream array_stream_;
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int counter_;
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -------------------------------------------------------------------
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// An error collector which simply concatenates all its errors into a big
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// block of text which can be checked.
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class TestErrorCollector : public ErrorCollector {
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TestErrorCollector() {}
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ~TestErrorCollector() {}
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string text_;
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // implements ErrorCollector ---------------------------------------
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void AddError(int line, int column, const string& message) {
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    strings::SubstituteAndAppend(&text_, "$0:$1: $2\n",
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 line, column, message);
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -------------------------------------------------------------------
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// We test each operation over a variety of block sizes to insure that
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// we test cases where reads cross buffer boundaries as well as cases
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// where they don't.  This is sort of a brute-force approach to this,
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// but it's easy to write and easy to understand.
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const int kBlockSizes[] = {1, 2, 3, 5, 7, 13, 32, 1024};
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class TokenizerTest : public testing::Test {
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) protected:
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // For easy testing.
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint64 ParseInteger(const string& text) {
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint64 result;
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    EXPECT_TRUE(Tokenizer::ParseInteger(text, kuint64max, &result));
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return result;
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ===================================================================
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// These tests causes gcc 3.3.5 (and earlier?) to give the cryptic error:
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   "sorry, unimplemented: `method_call_expr' not supported by dump_expr"
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if !defined(__GNUC__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3)
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// In each test case, the entire input text should parse as a single token
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// of the given type.
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct SimpleTokenCase {
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string input;
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Tokenizer::TokenType type;
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)inline ostream& operator<<(ostream& out,
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                           const SimpleTokenCase& test_case) {
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return out << CEscape(test_case.input);
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)SimpleTokenCase kSimpleTokenCases[] = {
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test identifiers.
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "hello",       Tokenizer::TYPE_IDENTIFIER },
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test integers.
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "123",         Tokenizer::TYPE_INTEGER },
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "0xab6",       Tokenizer::TYPE_INTEGER },
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "0XAB6",       Tokenizer::TYPE_INTEGER },
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "0X1234567",   Tokenizer::TYPE_INTEGER },
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "0x89abcdef",  Tokenizer::TYPE_INTEGER },
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "0x89ABCDEF",  Tokenizer::TYPE_INTEGER },
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "01234567",    Tokenizer::TYPE_INTEGER },
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test floats.
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "123.45",      Tokenizer::TYPE_FLOAT },
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "1.",          Tokenizer::TYPE_FLOAT },
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "1e3",         Tokenizer::TYPE_FLOAT },
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "1E3",         Tokenizer::TYPE_FLOAT },
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "1e-3",        Tokenizer::TYPE_FLOAT },
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "1e+3",        Tokenizer::TYPE_FLOAT },
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "1.e3",        Tokenizer::TYPE_FLOAT },
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "1.2e3",       Tokenizer::TYPE_FLOAT },
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { ".1",          Tokenizer::TYPE_FLOAT },
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { ".1e3",        Tokenizer::TYPE_FLOAT },
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { ".1e-3",       Tokenizer::TYPE_FLOAT },
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { ".1e+3",       Tokenizer::TYPE_FLOAT },
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test strings.
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "'hello'",     Tokenizer::TYPE_STRING },
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\"foo\"",     Tokenizer::TYPE_STRING },
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "'a\"b'",      Tokenizer::TYPE_STRING },
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\"a'b\"",     Tokenizer::TYPE_STRING },
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "'a\\'b'",     Tokenizer::TYPE_STRING },
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\"a\\\"b\"",  Tokenizer::TYPE_STRING },
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "'\\xf'",      Tokenizer::TYPE_STRING },
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "'\\0'",       Tokenizer::TYPE_STRING },
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test symbols.
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "+",           Tokenizer::TYPE_SYMBOL },
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { ".",           Tokenizer::TYPE_SYMBOL },
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST_2D(TokenizerTest, SimpleTokens, kSimpleTokenCases, kBlockSizes) {
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Set up the tokenizer.
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TestInputStream input(kSimpleTokenCases_case.input.data(),
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        kSimpleTokenCases_case.input.size(),
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        kBlockSizes_case);
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TestErrorCollector error_collector;
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Tokenizer tokenizer(&input, &error_collector);
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Before Next() is called, the initial token should always be TYPE_START.
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(Tokenizer::TYPE_START, tokenizer.current().type);
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ("", tokenizer.current().text);
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0, tokenizer.current().line);
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0, tokenizer.current().column);
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0, tokenizer.current().end_column);
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Parse the token.
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ASSERT_TRUE(tokenizer.Next());
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Check that it has the right type.
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(kSimpleTokenCases_case.type, tokenizer.current().type);
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Check that it contains the complete input text.
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(kSimpleTokenCases_case.input, tokenizer.current().text);
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Check that it is located at the beginning of the input
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0, tokenizer.current().line);
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0, tokenizer.current().column);
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(kSimpleTokenCases_case.input.size(),
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            tokenizer.current().end_column);
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // There should be no more input.
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(tokenizer.Next());
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // After Next() returns false, the token should have type TYPE_END.
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(Tokenizer::TYPE_END, tokenizer.current().type);
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ("", tokenizer.current().text);
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0, tokenizer.current().line);
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(kSimpleTokenCases_case.input.size(), tokenizer.current().column);
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(kSimpleTokenCases_case.input.size(),
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            tokenizer.current().end_column);
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // There should be no errors.
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_TRUE(error_collector.text_.empty());
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST_1D(TokenizerTest, FloatSuffix, kBlockSizes) {
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test the "allow_f_after_float" option.
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Set up the tokenizer.
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* text = "1f 2.5f 6e3f 7F";
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TestInputStream input(text, strlen(text), kBlockSizes_case);
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TestErrorCollector error_collector;
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Tokenizer tokenizer(&input, &error_collector);
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  tokenizer.set_allow_f_after_float(true);
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Advance through tokens and check that they are parsed as expected.
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ASSERT_TRUE(tokenizer.Next());
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(tokenizer.current().text, "1f");
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT);
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ASSERT_TRUE(tokenizer.Next());
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(tokenizer.current().text, "2.5f");
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT);
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ASSERT_TRUE(tokenizer.Next());
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(tokenizer.current().text, "6e3f");
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT);
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ASSERT_TRUE(tokenizer.Next());
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(tokenizer.current().text, "7F");
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT);
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // There should be no more input.
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(tokenizer.Next());
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // There should be no errors.
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_TRUE(error_collector.text_.empty());
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -------------------------------------------------------------------
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// In each case, the input is parsed to produce a list of tokens.  The
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// last token in "output" must have type TYPE_END.
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct MultiTokenCase {
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string input;
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Tokenizer::Token output[10];  // The compiler wants a constant array
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                // size for initialization to work.  There
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                // is no reason this can't be increased if
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                // needed.
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)inline ostream& operator<<(ostream& out,
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                           const MultiTokenCase& test_case) {
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return out << CEscape(test_case.input);
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)MultiTokenCase kMultiTokenCases[] = {
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test empty input.
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "", {
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_END       , ""     , 0,  0 },
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }},
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test all token types at the same time.
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "foo 1 1.2 + 'bar'", {
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_IDENTIFIER, "foo"  , 0,  0,  3 },
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_INTEGER   , "1"    , 0,  4,  5 },
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_FLOAT     , "1.2"  , 0,  6,  9 },
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_SYMBOL    , "+"    , 0, 10, 11 },
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_STRING    , "'bar'", 0, 12, 17 },
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_END       , ""     , 0, 17, 17 },
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }},
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test that consecutive symbols are parsed as separate tokens.
3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "!@+%", {
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_SYMBOL    , "!"    , 0, 0, 1 },
3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_SYMBOL    , "@"    , 0, 1, 2 },
3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_SYMBOL    , "+"    , 0, 2, 3 },
3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_SYMBOL    , "%"    , 0, 3, 4 },
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_END       , ""     , 0, 4, 4 },
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }},
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test that newlines affect line numbers correctly.
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "foo bar\nrab oof", {
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_IDENTIFIER, "foo", 0,  0, 3 },
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_IDENTIFIER, "bar", 0,  4, 7 },
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_IDENTIFIER, "rab", 1,  0, 3 },
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_IDENTIFIER, "oof", 1,  4, 7 },
3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_END       , ""   , 1,  7, 7 },
3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }},
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test that tabs affect column numbers correctly.
3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "foo\tbar  \tbaz", {
3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_IDENTIFIER, "foo", 0,  0,  3 },
3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_IDENTIFIER, "bar", 0,  8, 11 },
3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_IDENTIFIER, "baz", 0, 16, 19 },
3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_END       , ""   , 0, 19, 19 },
3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }},
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test that tabs in string literals affect column numbers correctly.
3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\"foo\tbar\" baz", {
3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_STRING    , "\"foo\tbar\"", 0,  0, 12 },
3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_IDENTIFIER, "baz"         , 0, 13, 16 },
3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_END       , ""            , 0, 16, 16 },
3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }},
3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test that line comments are ignored.
3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "foo // This is a comment\n"
3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "bar // This is another comment", {
3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_IDENTIFIER, "foo", 0,  0,  3 },
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_IDENTIFIER, "bar", 1,  0,  3 },
3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_END       , ""   , 1, 30, 30 },
3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }},
3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test that block comments are ignored.
3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "foo /* This is a block comment */ bar", {
3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_IDENTIFIER, "foo", 0,  0,  3 },
4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 34, 37 },
4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_END       , ""   , 0, 37, 37 },
4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }},
4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test that sh-style comments are not ignored by default.
4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "foo # bar\n"
4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "baz", {
4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 },
4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_SYMBOL    , "#"  , 0, 4, 5 },
4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 6, 9 },
4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_IDENTIFIER, "baz", 1, 0, 3 },
4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_END       , ""   , 1, 3, 3 },
4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }},
4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Bytes with the high-order bit set should not be seen as control characters.
4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\300", {
4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_SYMBOL, "\300", 0, 0, 1 },
4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_END   , ""    , 0, 1, 1 },
4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }},
4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test all whitespace chars
4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "foo\n\t\r\v\fbar", {
4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_IDENTIFIER, "foo", 0,  0,  3 },
4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_IDENTIFIER, "bar", 1, 11, 14 },
4245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    { Tokenizer::TYPE_END       , ""   , 1, 14, 14 },
4255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }},
4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) {
4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Set up the tokenizer.
4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TestInputStream input(kMultiTokenCases_case.input.data(),
4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        kMultiTokenCases_case.input.size(),
4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        kBlockSizes_case);
4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TestErrorCollector error_collector;
4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Tokenizer tokenizer(&input, &error_collector);
4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Before Next() is called, the initial token should always be TYPE_START.
4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(Tokenizer::TYPE_START, tokenizer.current().type);
4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ("", tokenizer.current().text);
4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0, tokenizer.current().line);
4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0, tokenizer.current().column);
4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0, tokenizer.current().end_column);
4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Loop through all expected tokens.
4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int i = 0;
4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Tokenizer::Token token;
4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  do {
4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    token = kMultiTokenCases_case.output[i++];
4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SCOPED_TRACE(testing::Message() << "Token #" << i << ": " << token.text);
4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Tokenizer::Token previous = tokenizer.current();
4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Next() should only return false when it hits the end token.
4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (token.type != Tokenizer::TYPE_END) {
4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ASSERT_TRUE(tokenizer.Next());
4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ASSERT_FALSE(tokenizer.Next());
4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Check that the previous token is set correctly.
4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    EXPECT_EQ(previous.type, tokenizer.previous().type);
4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    EXPECT_EQ(previous.text, tokenizer.previous().text);
4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    EXPECT_EQ(previous.line, tokenizer.previous().line);
4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    EXPECT_EQ(previous.column, tokenizer.previous().column);
4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    EXPECT_EQ(previous.end_column, tokenizer.previous().end_column);
4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Check that the token matches the expected one.
4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    EXPECT_EQ(token.type, tokenizer.current().type);
4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    EXPECT_EQ(token.text, tokenizer.current().text);
4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    EXPECT_EQ(token.line, tokenizer.current().line);
4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    EXPECT_EQ(token.column, tokenizer.current().column);
4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    EXPECT_EQ(token.end_column, tokenizer.current().end_column);
4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } while (token.type != Tokenizer::TYPE_END);
4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // There should be no errors.
4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_TRUE(error_collector.text_.empty());
4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This test causes gcc 3.3.5 (and earlier?) to give the cryptic error:
4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   "sorry, unimplemented: `method_call_expr' not supported by dump_expr"
4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if !defined(__GNUC__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3)
4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST_1D(TokenizerTest, ShCommentStyle, kBlockSizes) {
4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test the "comment_style" option.
4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* text = "foo # bar\n"
4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     "baz // qux\n"
4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     "corge /* grault */\n"
4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     "garply";
4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* const kTokens[] = {"foo",  // "# bar" is ignored
4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 "baz", "/", "/", "qux",
4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 "corge", "/", "*", "grault", "*", "/",
4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 "garply"};
4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Set up the tokenizer.
4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TestInputStream input(text, strlen(text), kBlockSizes_case);
4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TestErrorCollector error_collector;
4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Tokenizer tokenizer(&input, &error_collector);
5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  tokenizer.set_comment_style(Tokenizer::SH_COMMENT_STYLE);
5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Advance through tokens and check that they are parsed as expected.
5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < GOOGLE_ARRAYSIZE(kTokens); i++) {
5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    EXPECT_TRUE(tokenizer.Next());
5055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    EXPECT_EQ(tokenizer.current().text, kTokens[i]);
5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // There should be no more input.
5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(tokenizer.Next());
5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // There should be no errors.
5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_TRUE(error_collector.text_.empty());
5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -------------------------------------------------------------------
5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
518ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch// In each case, the input is expected to have two tokens named "prev" and
519ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch// "next" with comments in between.
520ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdochstruct DocCommentCase {
521ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  string input;
522ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
523ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  const char* prev_trailing_comments;
524ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  const char* detached_comments[10];
525ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  const char* next_leading_comments;
526ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch};
527ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
528ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdochinline ostream& operator<<(ostream& out,
529ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                           const DocCommentCase& test_case) {
530ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  return out << CEscape(test_case.input);
531ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch}
532ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
533ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben MurdochDocCommentCase kDocCommentCases[] = {
534ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  {
535ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch    "prev next",
536ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
537ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch    "",
538ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch    {},
539ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch    ""
540ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch      },
541ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
542ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch        {
543ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch      "prev /* ignored */ next",
544ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
545ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch      "",
546ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch      {},
547ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch      ""
548ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch        },
549ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
550ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch          {
551ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch        "prev // trailing comment\n"
552ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch            "next",
553ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
554ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch            " trailing comment\n",
555ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch            {},
556ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch            ""
557ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch          },
558ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
559ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch            {
560ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch          "prev\n"
561ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch              "// leading comment\n"
562ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch              "// line 2\n"
563ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch              "next",
564ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
565ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch              "",
566ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch              {},
567ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch              " leading comment\n"
568ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch              " line 2\n"
569ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch            },
570ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
571ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch              {
572ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch            "prev\n"
573ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                "// trailing comment\n"
574ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                "// line 2\n"
575ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                "\n"
576ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                "next",
577ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
578ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                " trailing comment\n"
579ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                " line 2\n",
580ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                {},
581ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                ""
582ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch              },
583ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
584ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                {
585ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch              "prev // trailing comment\n"
586ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                  "// leading comment\n"
587ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                  "// line 2\n"
588ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                  "next",
589ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
590ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                  " trailing comment\n",
591ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                  {},
592ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                  " leading comment\n"
593ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                  " line 2\n"
594ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                },
595ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
596ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                  {
597ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                "prev /* trailing block comment */\n"
598ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                    "/* leading block comment\n"
599ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                    " * line 2\n"
600ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                    " * line 3 */"
601ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                    "next",
602ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
603ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                    " trailing block comment ",
604ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                    {},
605ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                    " leading block comment\n"
606ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                    " line 2\n"
607ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                    " line 3 "
608ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                  },
609ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
610ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                    {
611ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                  "prev\n"
612ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                      "/* trailing block comment\n"
613ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                      " * line 2\n"
614ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                      " * line 3\n"
615ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                      " */\n"
616ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                      "/* leading block comment\n"
617ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                      " * line 2\n"
618ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                      " * line 3 */"
619ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                      "next",
620ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
621ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                      " trailing block comment\n"
622ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                      " line 2\n"
623ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                      " line 3\n",
624ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                      {},
625ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                      " leading block comment\n"
626ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                      " line 2\n"
627ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                      " line 3 "
628ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                    },
629ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
630ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                      {
631ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                    "prev\n"
632ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                        "// trailing comment\n"
633ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                        "\n"
634ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                        "// detached comment\n"
635ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                        "// line 2\n"
636ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                        "\n"
637ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                        "// second detached comment\n"
638ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                        "/* third detached comment\n"
639ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                        " * line 2 */\n"
640ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                        "// leading comment\n"
641ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                        "next",
642ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
643ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                        " trailing comment\n",
644ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                        {
645ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                      " detached comment\n"
646ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                          " line 2\n",
647ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                          " second detached comment\n",
648ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                          " third detached comment\n"
649ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                          " line 2 "
650ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                        },
651ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                          " leading comment\n"
652ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                        },
653ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
654ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                          {
655ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                        "prev /**/\n"
656ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                            "\n"
657ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                            "// detached comment\n"
658ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                            "\n"
659ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                            "// leading comment\n"
660ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                            "next",
661ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
662ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                            "",
663ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                            {
664ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                          " detached comment\n"
665ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                            },
666ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                              " leading comment\n"
667ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                            },
668ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
669ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                              {
670ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                            "prev /**/\n"
671ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                                "// leading comment\n"
672ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                                "next",
673ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
674ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                                "",
675ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                                {},
676ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                                " leading comment\n"
677ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                              },
678ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                              };
679ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
680ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben MurdochTEST_2D(TokenizerTest, DocComments, kDocCommentCases, kBlockSizes) {
681ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  // Set up the tokenizer.
682ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  TestInputStream input(kDocCommentCases_case.input.data(),
683ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                        kDocCommentCases_case.input.size(),
684ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                        kBlockSizes_case);
685ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  TestErrorCollector error_collector;
686ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  Tokenizer tokenizer(&input, &error_collector);
687ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
688ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  // Set up a second tokenizer where we'll pass all NULLs to NextWithComments().
689ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  TestInputStream input2(kDocCommentCases_case.input.data(),
690ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                        kDocCommentCases_case.input.size(),
691ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                        kBlockSizes_case);
692ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  Tokenizer tokenizer2(&input2, &error_collector);
693ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
694ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  tokenizer.Next();
695ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  tokenizer2.Next();
696ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
697ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  EXPECT_EQ("prev", tokenizer.current().text);
698ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  EXPECT_EQ("prev", tokenizer2.current().text);
699ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
700ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  string prev_trailing_comments;
701ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  vector<string> detached_comments;
702ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  string next_leading_comments;
703ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  tokenizer.NextWithComments(&prev_trailing_comments, &detached_comments,
704ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch                             &next_leading_comments);
705ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  tokenizer2.NextWithComments(NULL, NULL, NULL);
706ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  EXPECT_EQ("next", tokenizer.current().text);
707ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  EXPECT_EQ("next", tokenizer2.current().text);
708ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
709ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  EXPECT_EQ(kDocCommentCases_case.prev_trailing_comments,
710ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch            prev_trailing_comments);
711ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
712ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  for (int i = 0; i < detached_comments.size(); i++) {
713ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch    ASSERT_LT(i, GOOGLE_ARRAYSIZE(kDocCommentCases));
714ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch    ASSERT_TRUE(kDocCommentCases_case.detached_comments[i] != NULL);
715ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch    EXPECT_EQ(kDocCommentCases_case.detached_comments[i],
716ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch              detached_comments[i]);
717ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  }
718ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
719ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  // Verify that we matched all the detached comments.
720ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  EXPECT_EQ(NULL,
721ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch      kDocCommentCases_case.detached_comments[detached_comments.size()]);
722ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
723ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  EXPECT_EQ(kDocCommentCases_case.next_leading_comments,
724ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch            next_leading_comments);
725ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch}
726ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
727ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch// -------------------------------------------------------------------
728ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
7295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Test parse helpers.  It's not really worth setting up a full data-driven
7305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// test here.
7315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST_F(TokenizerTest, ParseInteger) {
7325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0, ParseInteger("0"));
7335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(123, ParseInteger("123"));
7345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0xabcdef12u, ParseInteger("0xabcdef12"));
7355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0xabcdef12u, ParseInteger("0xABCDEF12"));
7365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(kuint64max, ParseInteger("0xFFFFFFFFFFFFFFFF"));
7375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(01234567, ParseInteger("01234567"));
7385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0X123, ParseInteger("0X123"));
7395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test invalid integers that may still be tokenized as integers.
7415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0, ParseInteger("0x"));
7425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint64 i;
7443551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#ifdef PROTOBUF_HASDEATH_TEST  // death tests do not work on Windows yet
7455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test invalid integers that will never be tokenized as integers.
7465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("zxy", kuint64max, &i),
7475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "passed text that could not have been tokenized as an integer");
7485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("1.2", kuint64max, &i),
7495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "passed text that could not have been tokenized as an integer");
7505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("08", kuint64max, &i),
7515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "passed text that could not have been tokenized as an integer");
7525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("0xg", kuint64max, &i),
7535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "passed text that could not have been tokenized as an integer");
7545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("-1", kuint64max, &i),
7555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "passed text that could not have been tokenized as an integer");
7563551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#endif  // PROTOBUF_HASDEATH_TEST
7575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test overflows.
7595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_TRUE (Tokenizer::ParseInteger("0", 0, &i));
7605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(Tokenizer::ParseInteger("1", 0, &i));
7615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_TRUE (Tokenizer::ParseInteger("1", 1, &i));
7625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_TRUE (Tokenizer::ParseInteger("12345", 12345, &i));
7635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(Tokenizer::ParseInteger("12346", 12345, &i));
7645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_TRUE (Tokenizer::ParseInteger("0xFFFFFFFFFFFFFFFF" , kuint64max, &i));
7655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(Tokenizer::ParseInteger("0x10000000000000000", kuint64max, &i));
7665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST_F(TokenizerTest, ParseFloat) {
7695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(1    , Tokenizer::ParseFloat("1."));
7705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(1e3  , Tokenizer::ParseFloat("1e3"));
7715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(1e3  , Tokenizer::ParseFloat("1E3"));
7725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(1.5e3, Tokenizer::ParseFloat("1.5e3"));
7735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(.1   , Tokenizer::ParseFloat(".1"));
7745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(.25  , Tokenizer::ParseFloat(".25"));
7755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(.1e3 , Tokenizer::ParseFloat(".1e3"));
7765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(.25e3, Tokenizer::ParseFloat(".25e3"));
7775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(.1e+3, Tokenizer::ParseFloat(".1e+3"));
7785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(.1e-3, Tokenizer::ParseFloat(".1e-3"));
7795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(5    , Tokenizer::ParseFloat("5"));
7805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(6e-12, Tokenizer::ParseFloat("6e-12"));
7815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(1.2  , Tokenizer::ParseFloat("1.2"));
7825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(1.e2 , Tokenizer::ParseFloat("1.e2"));
7835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test invalid integers that may still be tokenized as integers.
7855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1e"));
7865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1e-"));
7875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1.e"));
7885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test 'f' suffix.
7905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1f"));
7915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1.0f"));
7925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1F"));
7935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // These should parse successfully even though they are out of range.
7955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Overflows become infinity and underflows become zero.
7965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(     0.0, Tokenizer::ParseFloat("1e-9999999999999999999999999999"));
7975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(HUGE_VAL, Tokenizer::ParseFloat("1e+9999999999999999999999999999"));
7985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7993551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#ifdef PROTOBUF_HASDEATH_TEST  // death tests do not work on Windows yet
8005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test invalid integers that will never be tokenized as integers.
8015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("zxy"),
8025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "passed text that could not have been tokenized as a float");
8035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("1-e0"),
8045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "passed text that could not have been tokenized as a float");
8055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("-1.0"),
8065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "passed text that could not have been tokenized as a float");
8073551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#endif  // PROTOBUF_HASDEATH_TEST
8085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
8095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST_F(TokenizerTest, ParseString) {
8115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string output;
8125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Tokenizer::ParseString("'hello'", &output);
8135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ("hello", output);
8145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Tokenizer::ParseString("\"blah\\nblah2\"", &output);
8155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ("blah\nblah2", output);
8165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Tokenizer::ParseString("'\\1x\\1\\123\\739\\52\\334n\\3'", &output);
8175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ("\1x\1\123\739\52\334n\3", output);
8185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Tokenizer::ParseString("'\\x20\\x4'", &output);
8195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ("\x20\x4", output);
8205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test invalid strings that may still be tokenized as strings.
8225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Tokenizer::ParseString("\"\\a\\l\\v\\t", &output);  // \l is invalid
8235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ("\a?\v\t", output);
8245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Tokenizer::ParseString("'", &output);
8255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ("", output);
8265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Tokenizer::ParseString("'\\", &output);
8275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ("\\", output);
8285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
829ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  // Experiment with Unicode escapes. Here are one-, two- and three-byte Unicode
830ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  // characters.
831ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  Tokenizer::ParseString("'\\u0024\\u00a2\\u20ac\\U00024b62XX'", &output);
832ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  EXPECT_EQ("$¢€��XX", output);
833ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  // Same thing encoded using UTF16.
834ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  Tokenizer::ParseString("'\\u0024\\u00a2\\u20ac\\ud852\\udf62XX'", &output);
835ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  EXPECT_EQ("$¢€��XX", output);
836ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  // Here's some broken UTF16; there's a head surrogate with no tail surrogate.
837ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  // We just output this as if it were UTF8; it's not a defined code point, but
838ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  // it has a defined encoding.
839ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  Tokenizer::ParseString("'\\ud852XX'", &output);
840ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  EXPECT_EQ("\xed\xa1\x92XX", output);
841ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  // Malformed escape: Demons may fly out of the nose.
842ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  Tokenizer::ParseString("\\u0", &output);
843ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  EXPECT_EQ("u0", output);
844ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
8455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test invalid strings that will never be tokenized as strings.
8463551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#ifdef PROTOBUF_HASDEATH_TEST  // death tests do not work on Windows yet
8475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DEBUG_DEATH(Tokenizer::ParseString("", &output),
8485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "passed text that could not have been tokenized as a string");
8493551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#endif  // PROTOBUF_HASDEATH_TEST
8505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
8515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST_F(TokenizerTest, ParseStringAppend) {
8535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Check that ParseString and ParseStringAppend differ.
8545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string output("stuff+");
8555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Tokenizer::ParseStringAppend("'hello'", &output);
8565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ("stuff+hello", output);
8575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Tokenizer::ParseString("'hello'", &output);
8585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ("hello", output);
8595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
8605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -------------------------------------------------------------------
8625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Each case parses some input text, ignoring the tokens produced, and
8645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// checks that the error output matches what is expected.
8655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct ErrorCase {
8665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string input;
8675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool recoverable;  // True if the tokenizer should be able to recover and
8685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     // parse more tokens after seeing this error.  Cases
8695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     // for which this is true must end with "foo" as
8705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     // the last token, which the test will check for.
8715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* errors;
8725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
8735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)inline ostream& operator<<(ostream& out,
8755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                           const ErrorCase& test_case) {
8765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return out << CEscape(test_case.input);
8775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
8785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ErrorCase kErrorCases[] = {
8805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // String errors.
8815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "'\\l' foo", true,
8825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:2: Invalid escape sequence in string literal.\n" },
8835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "'\\x' foo", true,
8845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:3: Expected hex digits for escape sequence.\n" },
8855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "'foo", false,
8865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:4: String literals cannot cross line boundaries.\n" },
8875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "'bar\nfoo", true,
8885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:4: String literals cannot cross line boundaries.\n" },
889ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  { "'\\u01' foo", true,
890ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch    "0:5: Expected four hex digits for \\u escape sequence.\n" },
891ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  { "'\\u01' foo", true,
892ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch    "0:5: Expected four hex digits for \\u escape sequence.\n" },
893ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  { "'\\uXYZ' foo", true,
894ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch    "0:3: Expected four hex digits for \\u escape sequence.\n" },
8955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Integer errors.
8975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "123foo", true,
8985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:3: Need space between number and identifier.\n" },
8995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Hex/octal errors.
9015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "0x foo", true,
9025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:2: \"0x\" must be followed by hex digits.\n" },
9035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "0541823 foo", true,
9045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:4: Numbers starting with leading zero must be in octal.\n" },
9055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "0x123z foo", true,
9065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:5: Need space between number and identifier.\n" },
9075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "0x123.4 foo", true,
9085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:5: Hex and octal numbers must be integers.\n" },
9095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "0123.4 foo", true,
9105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:4: Hex and octal numbers must be integers.\n" },
9115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Float errors.
9135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "1e foo", true,
9145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:2: \"e\" must be followed by exponent.\n" },
9155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "1e- foo", true,
9165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:3: \"e\" must be followed by exponent.\n" },
9175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "1.2.3 foo", true,
9185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:3: Already saw decimal point or exponent; can't have another one.\n" },
9195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "1e2.3 foo", true,
9205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:3: Already saw decimal point or exponent; can't have another one.\n" },
9215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "a.1 foo", true,
9225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:1: Need space between identifier and decimal point.\n" },
9235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // allow_f_after_float not enabled, so this should be an error.
9245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "1.0f foo", true,
9255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:3: Need space between number and identifier.\n" },
9265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Block comment errors.
9285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "/*", false,
9295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:2: End-of-file inside block comment.\n"
9305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:0:   Comment started here.\n"},
9315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "/*/*/ foo", true,
9325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:3: \"/*\" inside block comment.  Block comments cannot be nested.\n"},
9335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Control characters.  Multiple consecutive control characters should only
9355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // produce one error.
9365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\b foo", true,
9375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:0: Invalid control characters encountered in text.\n" },
9385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\b\b foo", true,
9395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:0: Invalid control characters encountered in text.\n" },
9405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Check that control characters at end of input don't result in an
9425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // infinite loop.
9435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\b", false,
9445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:0: Invalid control characters encountered in text.\n" },
9455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Check recovery from '\0'.  We have to explicitly specify the length of
9475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // these strings because otherwise the string constructor will just call
9485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // strlen() which will see the first '\0' and think that is the end of the
9495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // string.
9505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { string("\0foo", 4), true,
9515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:0: Invalid control characters encountered in text.\n" },
9525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { string("\0\0foo", 5), true,
9535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "0:0: Invalid control characters encountered in text.\n" },
9545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
9555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST_2D(TokenizerTest, Errors, kErrorCases, kBlockSizes) {
9575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Set up the tokenizer.
9585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TestInputStream input(kErrorCases_case.input.data(),
9595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        kErrorCases_case.input.size(),
9605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        kBlockSizes_case);
9615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TestErrorCollector error_collector;
9625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Tokenizer tokenizer(&input, &error_collector);
9635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Ignore all input, except remember if the last token was "foo".
9655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool last_was_foo = false;
9665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while (tokenizer.Next()) {
9675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    last_was_foo = tokenizer.current().text == "foo";
9685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
9695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Check that the errors match what was expected.
971ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  EXPECT_EQ(kErrorCases_case.errors, error_collector.text_);
9725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If the error was recoverable, make sure we saw "foo" after it.
9745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (kErrorCases_case.recoverable) {
9755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    EXPECT_TRUE(last_was_foo);
9765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
9775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
9785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -------------------------------------------------------------------
9805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST_1D(TokenizerTest, BackUpOnDestruction, kBlockSizes) {
9825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string text = "foo bar";
9835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TestInputStream input(text.data(), text.size(), kBlockSizes_case);
9845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Create a tokenizer, read one token, then destroy it.
9865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  {
9875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    TestErrorCollector error_collector;
9885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Tokenizer tokenizer(&input, &error_collector);
9895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    tokenizer.Next();
9915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
9925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Only "foo" should have been read.
9945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(strlen("foo"), input.ByteCount());
9955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
9965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
997ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
9985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace
9995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace io
10005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace protobuf
10015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace google
1002