streaming_utf8_validator.cc revision 5d1f7b1de12d16ceb2c938c56701a3e8bfa558f7
1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// This implementation doesn't use ICU. The ICU macros are oriented towards
6// character-at-a-time processing, whereas byte-at-a-time processing is easier
7// with streaming input.
8
9#include "base/i18n/streaming_utf8_validator.h"
10
11#include "base/i18n/utf8_validator_tables.h"
12#include "base/logging.h"
13
14namespace base {
15namespace {
16
17uint8 StateTableLookup(uint8 offset) {
18  // Skip the bounds check on non-debug builds so that it isn't necessary to set
19  // LOGGING_IS_OFFICIAL_BUILD just to do a performance test.
20  if (logging::DEBUG_MODE)
21    DCHECK_LT(offset, internal::kUtf8ValidatorTablesSize);
22  return internal::kUtf8ValidatorTables[offset];
23}
24
25}  // namespace
26
27StreamingUtf8Validator::State StreamingUtf8Validator::AddBytes(const char* data,
28                                                               size_t size) {
29  // Copy |state_| into a local variable so that the compiler doesn't have to be
30  // careful of aliasing.
31  uint8 state = state_;
32  for (const char* p = data; p != data + size; ++p) {
33    if ((*p & 0x80) == 0) {
34      if (state == 0)
35        continue;
36      state = internal::I18N_UTF8_VALIDATOR_INVALID_INDEX;
37      break;
38    }
39    const uint8 shift_amount = StateTableLookup(state);
40    const uint8 shifted_char = (*p & 0x7F) >> shift_amount;
41    state = StateTableLookup(state + shifted_char + 1);
42    // State may be INVALID here, but this code is optimised for the case of
43    // valid UTF-8 and it is more efficient (by about 2%) to not attempt an
44    // early loop exit unless we hit an ASCII character.
45  }
46  state_ = state;
47  return state == 0 ? VALID_ENDPOINT
48      : state == internal::I18N_UTF8_VALIDATOR_INVALID_INDEX
49      ? INVALID
50      : VALID_MIDPOINT;
51}
52
53void StreamingUtf8Validator::Reset() {
54  state_ = 0u;
55}
56
57bool StreamingUtf8Validator::Validate(const std::string& string) {
58  return StreamingUtf8Validator().AddBytes(string.data(), string.size()) ==
59         VALID_ENDPOINT;
60}
61
62}  // namespace base
63