11b3afd1cab9087ca3c4e585d3da77d374d65c082mstarzinger@chromium.org// Copyright 2011 the V8 project authors. All rights reserved. 243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// Redistribution and use in source and binary forms, with or without 343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// modification, are permitted provided that the following conditions are 443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// met: 543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// 643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// * Redistributions of source code must retain the above copyright 743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// notice, this list of conditions and the following disclaimer. 843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// * Redistributions in binary form must reproduce the above 943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// copyright notice, this list of conditions and the following 1043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// disclaimer in the documentation and/or other materials provided 1143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// with the distribution. 1243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// * Neither the name of Google Inc. nor the names of its 1343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// contributors may be used to endorse or promote products derived 1443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// from this software without specific prior written permission. 1543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// 1643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 2143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 2243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 2643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen 285ec4892aef9cca42940d7d92302abf674365f6b7ager@chromium.org#ifndef V8_UNICODE_H_ 295ec4892aef9cca42940d7d92302abf674365f6b7ager@chromium.org#define V8_UNICODE_H_ 3043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen 3143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen#include <sys/types.h> 32a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org#include <globals.h> 3343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen/** 3443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen * \file 3543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen * Definitions and convenience functions for working with unicode. 3643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen */ 3743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen 3843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansennamespace unibrow { 3943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen 4043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansentypedef unsigned int uchar; 4143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansentypedef unsigned char byte; 4243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen 4343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen/** 4443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen * The max length of the result of converting the case of a single 4543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen * character. 4643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen */ 471b3afd1cab9087ca3c4e585d3da77d374d65c082mstarzinger@chromium.orgconst int kMaxMappingSize = 4; 4843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen 4943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansentemplate <class T, int size = 256> 5043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenclass Predicate { 5143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen public: 5243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen inline Predicate() { } 5343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen inline bool get(uchar c); 5443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen private: 5543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen friend class Test; 5643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen bool CalculateValue(uchar c); 5743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen struct CacheEntry { 5843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen inline CacheEntry() : code_point_(0), value_(0) { } 5943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen inline CacheEntry(uchar code_point, bool value) 6043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen : code_point_(code_point), 6143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen value_(value) { } 6243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen uchar code_point_ : 21; 6343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen bool value_ : 1; 6443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen }; 6543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static const int kSize = size; 6643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static const int kMask = kSize - 1; 6743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen CacheEntry entries_[kSize]; 6843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen}; 6943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen 7043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// A cache used in case conversion. It caches the value for characters 7143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// that either have no mapping or map to a single character independent 7243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// of context. Characters that map to more than one character or that 7343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// map differently depending on context are always looked up. 7443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansentemplate <class T, int size = 256> 7543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenclass Mapping { 7643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen public: 7743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen inline Mapping() { } 7843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen inline int get(uchar c, uchar n, uchar* result); 7943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen private: 8043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen friend class Test; 8143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen int CalculateValue(uchar c, uchar n, uchar* result); 8243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen struct CacheEntry { 83a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org inline CacheEntry() : code_point_(kNoChar), offset_(0) { } 8443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen inline CacheEntry(uchar code_point, signed offset) 8543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen : code_point_(code_point), 8643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen offset_(offset) { } 87a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org uchar code_point_; 88a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org signed offset_; 89a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org static const int kNoChar = (1 << 21) - 1; 9043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen }; 9143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static const int kSize = size; 9243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static const int kMask = kSize - 1; 9343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen CacheEntry entries_[kSize]; 9443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen}; 9543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen 9643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenclass UnicodeData { 9743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen private: 9843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen friend class Test; 9943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static int GetByteCount(); 100ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org static const uchar kMaxCodePoint; 10143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen}; 10243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen 103154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.orgclass Utf16 { 104154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org public: 105154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org static inline bool IsLeadSurrogate(int code) { 106154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org if (code == kNoPreviousCharacter) return false; 107154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org return (code & 0xfc00) == 0xd800; 108154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org } 109154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org static inline bool IsTrailSurrogate(int code) { 110154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org if (code == kNoPreviousCharacter) return false; 111154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org return (code & 0xfc00) == 0xdc00; 112154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org } 113154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org 114154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org static inline int CombineSurrogatePair(uchar lead, uchar trail) { 115154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org return 0x10000 + ((lead & 0x3ff) << 10) + (trail & 0x3ff); 116154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org } 117154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org static const int kNoPreviousCharacter = -1; 118154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org static const uchar kMaxNonSurrogateCharCode = 0xffff; 119154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org // Encoding a single UTF-16 code unit will produce 1, 2 or 3 bytes 120154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org // of UTF-8 data. The special case where the unit is a surrogate 121154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org // trail produces 1 byte net, because the encoding of the pair is 122154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org // 4 bytes and the 3 bytes that were used to encode the lead surrogate 123154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org // can be reclaimed. 124154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org static const int kMaxExtraUtf8BytesForOneUtf16CodeUnit = 3; 125154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org // One UTF-16 surrogate is endoded (illegally) as 3 UTF-8 bytes. 126154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org // The illegality stems from the surrogate not being part of a pair. 127154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org static const int kUtf8BytesToCodeASurrogate = 3; 128a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org static inline uint16_t LeadSurrogate(uint32_t char_code) { 129154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org return 0xd800 + (((char_code - 0x10000) >> 10) & 0x3ff); 130154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org } 131a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org static inline uint16_t TrailSurrogate(uint32_t char_code) { 132154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org return 0xdc00 + (char_code & 0x3ff); 133154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org } 134154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org}; 135154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org 13659297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.orgclass Latin1 { 13759297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.org public: 13859297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.org static const unsigned kMaxChar = 0xff; 1396bec0093ef661b53a1e338a233d7aafb9536a307mvstanton@chromium.org // Returns 0 if character does not convert to single latin-1 character 1406bec0093ef661b53a1e338a233d7aafb9536a307mvstanton@chromium.org // or if the character doesn't not convert back to latin-1 via inverse 1416bec0093ef661b53a1e338a233d7aafb9536a307mvstanton@chromium.org // operation (upper to lower, etc). 1426bec0093ef661b53a1e338a233d7aafb9536a307mvstanton@chromium.org static inline uint16_t ConvertNonLatin1ToLatin1(uint16_t); 14359297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.org}; 144154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org 14543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenclass Utf8 { 14643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen public: 147154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org static inline uchar Length(uchar chr, int previous); 1482bda543d75374afd8d7e98f56ca99a57ae1b7bd1svenpanne@chromium.org static inline unsigned EncodeOneByte(char* out, uint8_t c); 149154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org static inline unsigned Encode( 150154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org char* out, uchar c, int previous); 151d88afa260e45de10e729b05a20146184a488aff7erik.corry@gmail.com static uchar CalculateValue(const byte* str, 152d88afa260e45de10e729b05a20146184a488aff7erik.corry@gmail.com unsigned length, 153d88afa260e45de10e729b05a20146184a488aff7erik.corry@gmail.com unsigned* cursor); 15443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static const uchar kBadChar = 0xFFFD; 15543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static const unsigned kMaxEncodedSize = 4; 15643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static const unsigned kMaxOneByteChar = 0x7f; 15743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static const unsigned kMaxTwoByteChar = 0x7ff; 15843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static const unsigned kMaxThreeByteChar = 0xffff; 15943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static const unsigned kMaxFourByteChar = 0x1fffff; 16043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen 161154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org // A single surrogate is coded as a 3 byte UTF-8 sequence, but two together 162154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org // that match are coded as a 4 byte UTF-8 sequence. 163154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org static const unsigned kBytesSavedByCombiningSurrogates = 2; 164154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org static const unsigned kSizeOfUnmatchedSurrogate = 3; 16543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static inline uchar ValueOf(const byte* str, 16643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen unsigned length, 16743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen unsigned* cursor); 16843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen}; 16943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen 17043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen 171a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.orgclass Utf8DecoderBase { 17243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen public: 173a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org // Initialization done in subclass. 174a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org inline Utf8DecoderBase(); 175a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org inline Utf8DecoderBase(uint16_t* buffer, 176a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org unsigned buffer_length, 177a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org const uint8_t* stream, 178a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org unsigned stream_length); 179a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org inline unsigned Utf16Length() const { return utf16_length_; } 180a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org protected: 181a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org // This reads all characters and sets the utf16_length_. 182a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org // The first buffer_length utf16 chars are cached in the buffer. 183a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org void Reset(uint16_t* buffer, 184a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org unsigned buffer_length, 185a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org const uint8_t* stream, 186a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org unsigned stream_length); 187a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org static void WriteUtf16Slow(const uint8_t* stream, 188a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org uint16_t* data, 189a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org unsigned length); 190a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org const uint8_t* unbuffered_start_; 191a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org unsigned utf16_length_; 192a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org bool last_byte_of_buffer_unused_; 193a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org private: 194a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase); 195a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org}; 196a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org 197a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.orgtemplate <unsigned kBufferSize> 198a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.orgclass Utf8Decoder : public Utf8DecoderBase { 199a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org public: 200a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org inline Utf8Decoder() {} 201a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org inline Utf8Decoder(const char* stream, unsigned length); 202a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org inline void Reset(const char* stream, unsigned length); 203a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org inline unsigned WriteUtf16(uint16_t* data, unsigned length) const; 204a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org private: 205a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org uint16_t buffer_[kBufferSize]; 20643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen}; 20743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen 208e90029b96bc4097e0f14d33cc086030d7ad5007awhesse@chromium.org 20943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct Uppercase { 21043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static bool Is(uchar c); 21143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen}; 21243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct Lowercase { 21343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static bool Is(uchar c); 21443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen}; 21543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct Letter { 21643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static bool Is(uchar c); 21743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen}; 21843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct Space { 21943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static bool Is(uchar c); 22043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen}; 22143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct Number { 22243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static bool Is(uchar c); 22343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen}; 22443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct WhiteSpace { 22543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static bool Is(uchar c); 22643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen}; 22743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct LineTerminator { 22843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static bool Is(uchar c); 22943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen}; 23043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct CombiningMark { 23143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static bool Is(uchar c); 23243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen}; 23343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct ConnectorPunctuation { 23443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static bool Is(uchar c); 23543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen}; 23643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct ToLowercase { 237a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org static const int kMaxWidth = 3; 23843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static int Convert(uchar c, 23943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen uchar n, 24043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen uchar* result, 24143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen bool* allow_caching_ptr); 24243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen}; 24343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct ToUppercase { 244a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org static const int kMaxWidth = 3; 245a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org static int Convert(uchar c, 246a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org uchar n, 247a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org uchar* result, 248a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org bool* allow_caching_ptr); 249a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org}; 250a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.orgstruct Ecma262Canonicalize { 251a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org static const int kMaxWidth = 1; 252a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org static int Convert(uchar c, 253a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org uchar n, 254a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org uchar* result, 255a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org bool* allow_caching_ptr); 256a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org}; 257a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.orgstruct Ecma262UnCanonicalize { 258a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org static const int kMaxWidth = 4; 259a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org static int Convert(uchar c, 260a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org uchar n, 261a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org uchar* result, 262a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org bool* allow_caching_ptr); 263a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org}; 264a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.orgstruct CanonicalizationRange { 265a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org static const int kMaxWidth = 1; 26643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen static int Convert(uchar c, 26743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen uchar n, 26843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen uchar* result, 26943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen bool* allow_caching_ptr); 27043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen}; 27143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen 27243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen} // namespace unibrow 27343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen 2745ec4892aef9cca42940d7d92302abf674365f6b7ager@chromium.org#endif // V8_UNICODE_H_ 275