streaming_utf8_validator.cc revision 5d1f7b1de12d16ceb2c938c56701a3e8bfa558f7
1// Copyright 2014 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// This implementation doesn't use ICU. The ICU macros are oriented towards 6// character-at-a-time processing, whereas byte-at-a-time processing is easier 7// with streaming input. 8 9#include "base/i18n/streaming_utf8_validator.h" 10 11#include "base/i18n/utf8_validator_tables.h" 12#include "base/logging.h" 13 14namespace base { 15namespace { 16 17uint8 StateTableLookup(uint8 offset) { 18 // Skip the bounds check on non-debug builds so that it isn't necessary to set 19 // LOGGING_IS_OFFICIAL_BUILD just to do a performance test. 20 if (logging::DEBUG_MODE) 21 DCHECK_LT(offset, internal::kUtf8ValidatorTablesSize); 22 return internal::kUtf8ValidatorTables[offset]; 23} 24 25} // namespace 26 27StreamingUtf8Validator::State StreamingUtf8Validator::AddBytes(const char* data, 28 size_t size) { 29 // Copy |state_| into a local variable so that the compiler doesn't have to be 30 // careful of aliasing. 31 uint8 state = state_; 32 for (const char* p = data; p != data + size; ++p) { 33 if ((*p & 0x80) == 0) { 34 if (state == 0) 35 continue; 36 state = internal::I18N_UTF8_VALIDATOR_INVALID_INDEX; 37 break; 38 } 39 const uint8 shift_amount = StateTableLookup(state); 40 const uint8 shifted_char = (*p & 0x7F) >> shift_amount; 41 state = StateTableLookup(state + shifted_char + 1); 42 // State may be INVALID here, but this code is optimised for the case of 43 // valid UTF-8 and it is more efficient (by about 2%) to not attempt an 44 // early loop exit unless we hit an ASCII character. 45 } 46 state_ = state; 47 return state == 0 ? VALID_ENDPOINT 48 : state == internal::I18N_UTF8_VALIDATOR_INVALID_INDEX 49 ? INVALID 50 : VALID_MIDPOINT; 51} 52 53void StreamingUtf8Validator::Reset() { 54 state_ = 0u; 55} 56 57bool StreamingUtf8Validator::Validate(const std::string& string) { 58 return StreamingUtf8Validator().AddBytes(string.data(), string.size()) == 59 VALID_ENDPOINT; 60} 61 62} // namespace base 63