11b3afd1cab9087ca3c4e585d3da77d374d65c082mstarzinger@chromium.org// Copyright 2011 the V8 project authors. All rights reserved.
243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// Redistribution and use in source and binary forms, with or without
343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// modification, are permitted provided that the following conditions are
443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// met:
543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen//
643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen//     * Redistributions of source code must retain the above copyright
743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen//       notice, this list of conditions and the following disclaimer.
843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen//     * Redistributions in binary form must reproduce the above
943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen//       copyright notice, this list of conditions and the following
1043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen//       disclaimer in the documentation and/or other materials provided
1143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen//       with the distribution.
1243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen//     * Neither the name of Google Inc. nor the names of its
1343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen//       contributors may be used to endorse or promote products derived
1443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen//       from this software without specific prior written permission.
1543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen//
1643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
2243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen
285ec4892aef9cca42940d7d92302abf674365f6b7ager@chromium.org#ifndef V8_UNICODE_H_
295ec4892aef9cca42940d7d92302abf674365f6b7ager@chromium.org#define V8_UNICODE_H_
3043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen
3143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen#include <sys/types.h>
32a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org#include <globals.h>
3343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen/**
3443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen * \file
3543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen * Definitions and convenience functions for working with unicode.
3643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen */
3743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen
3843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansennamespace unibrow {
3943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen
4043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansentypedef unsigned int uchar;
4143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansentypedef unsigned char byte;
4243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen
4343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen/**
4443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen * The max length of the result of converting the case of a single
4543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen * character.
4643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen */
471b3afd1cab9087ca3c4e585d3da77d374d65c082mstarzinger@chromium.orgconst int kMaxMappingSize = 4;
4843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen
4943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansentemplate <class T, int size = 256>
5043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenclass Predicate {
5143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen public:
5243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  inline Predicate() { }
5343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  inline bool get(uchar c);
5443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen private:
5543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  friend class Test;
5643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  bool CalculateValue(uchar c);
5743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  struct CacheEntry {
5843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen    inline CacheEntry() : code_point_(0), value_(0) { }
5943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen    inline CacheEntry(uchar code_point, bool value)
6043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen      : code_point_(code_point),
6143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen        value_(value) { }
6243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen    uchar code_point_ : 21;
6343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen    bool value_ : 1;
6443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  };
6543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static const int kSize = size;
6643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static const int kMask = kSize - 1;
6743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  CacheEntry entries_[kSize];
6843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen};
6943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen
7043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// A cache used in case conversion.  It caches the value for characters
7143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// that either have no mapping or map to a single character independent
7243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// of context.  Characters that map to more than one character or that
7343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen// map differently depending on context are always looked up.
7443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansentemplate <class T, int size = 256>
7543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenclass Mapping {
7643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen public:
7743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  inline Mapping() { }
7843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  inline int get(uchar c, uchar n, uchar* result);
7943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen private:
8043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  friend class Test;
8143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  int CalculateValue(uchar c, uchar n, uchar* result);
8243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  struct CacheEntry {
83a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org    inline CacheEntry() : code_point_(kNoChar), offset_(0) { }
8443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen    inline CacheEntry(uchar code_point, signed offset)
8543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen      : code_point_(code_point),
8643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen        offset_(offset) { }
87a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org    uchar code_point_;
88a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org    signed offset_;
89a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org    static const int kNoChar = (1 << 21) - 1;
9043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  };
9143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static const int kSize = size;
9243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static const int kMask = kSize - 1;
9343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  CacheEntry entries_[kSize];
9443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen};
9543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen
9643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenclass UnicodeData {
9743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen private:
9843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  friend class Test;
9943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static int GetByteCount();
100ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org  static const uchar kMaxCodePoint;
10143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen};
10243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen
103154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.orgclass Utf16 {
104154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org public:
105154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  static inline bool IsLeadSurrogate(int code) {
106154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org    if (code == kNoPreviousCharacter) return false;
107154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org    return (code & 0xfc00) == 0xd800;
108154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  }
109154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  static inline bool IsTrailSurrogate(int code) {
110154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org    if (code == kNoPreviousCharacter) return false;
111154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org    return (code & 0xfc00) == 0xdc00;
112154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  }
113154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org
114154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  static inline int CombineSurrogatePair(uchar lead, uchar trail) {
115154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org    return 0x10000 + ((lead & 0x3ff) << 10) + (trail & 0x3ff);
116154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  }
117154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  static const int kNoPreviousCharacter = -1;
118154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  static const uchar kMaxNonSurrogateCharCode = 0xffff;
119154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  // Encoding a single UTF-16 code unit will produce 1, 2 or 3 bytes
120154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  // of UTF-8 data.  The special case where the unit is a surrogate
121154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  // trail produces 1 byte net, because the encoding of the pair is
122154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  // 4 bytes and the 3 bytes that were used to encode the lead surrogate
123154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  // can be reclaimed.
124154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  static const int kMaxExtraUtf8BytesForOneUtf16CodeUnit = 3;
125154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  // One UTF-16 surrogate is endoded (illegally) as 3 UTF-8 bytes.
126154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  // The illegality stems from the surrogate not being part of a pair.
127154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  static const int kUtf8BytesToCodeASurrogate = 3;
128a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org  static inline uint16_t LeadSurrogate(uint32_t char_code) {
129154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org    return 0xd800 + (((char_code - 0x10000) >> 10) & 0x3ff);
130154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  }
131a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org  static inline uint16_t TrailSurrogate(uint32_t char_code) {
132154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org    return 0xdc00 + (char_code & 0x3ff);
133154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  }
134154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org};
135154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org
13659297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.orgclass Latin1 {
13759297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.org public:
13859297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.org  static const unsigned kMaxChar = 0xff;
1396bec0093ef661b53a1e338a233d7aafb9536a307mvstanton@chromium.org  // Returns 0 if character does not convert to single latin-1 character
1406bec0093ef661b53a1e338a233d7aafb9536a307mvstanton@chromium.org  // or if the character doesn't not convert back to latin-1 via inverse
1416bec0093ef661b53a1e338a233d7aafb9536a307mvstanton@chromium.org  // operation (upper to lower, etc).
1426bec0093ef661b53a1e338a233d7aafb9536a307mvstanton@chromium.org  static inline uint16_t ConvertNonLatin1ToLatin1(uint16_t);
14359297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.org};
144154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org
14543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenclass Utf8 {
14643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen public:
147154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  static inline uchar Length(uchar chr, int previous);
1482bda543d75374afd8d7e98f56ca99a57ae1b7bd1svenpanne@chromium.org  static inline unsigned EncodeOneByte(char* out, uint8_t c);
149154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  static inline unsigned Encode(
150154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org      char* out, uchar c, int previous);
151d88afa260e45de10e729b05a20146184a488aff7erik.corry@gmail.com  static uchar CalculateValue(const byte* str,
152d88afa260e45de10e729b05a20146184a488aff7erik.corry@gmail.com                              unsigned length,
153d88afa260e45de10e729b05a20146184a488aff7erik.corry@gmail.com                              unsigned* cursor);
15443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static const uchar kBadChar = 0xFFFD;
15543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static const unsigned kMaxEncodedSize   = 4;
15643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static const unsigned kMaxOneByteChar   = 0x7f;
15743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static const unsigned kMaxTwoByteChar   = 0x7ff;
15843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static const unsigned kMaxThreeByteChar = 0xffff;
15943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static const unsigned kMaxFourByteChar  = 0x1fffff;
16043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen
161154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  // A single surrogate is coded as a 3 byte UTF-8 sequence, but two together
162154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  // that match are coded as a 4 byte UTF-8 sequence.
163154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  static const unsigned kBytesSavedByCombiningSurrogates = 2;
164154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org  static const unsigned kSizeOfUnmatchedSurrogate = 3;
16543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static inline uchar ValueOf(const byte* str,
16643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen                              unsigned length,
16743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen                              unsigned* cursor);
16843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen};
16943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen
17043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen
171a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.orgclass Utf8DecoderBase {
17243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen public:
173a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org  // Initialization done in subclass.
174a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org  inline Utf8DecoderBase();
175a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org  inline Utf8DecoderBase(uint16_t* buffer,
176a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org                         unsigned buffer_length,
177a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org                         const uint8_t* stream,
178a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org                         unsigned stream_length);
179a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org  inline unsigned Utf16Length() const { return utf16_length_; }
180a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org protected:
181a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org  // This reads all characters and sets the utf16_length_.
182a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org  // The first buffer_length utf16 chars are cached in the buffer.
183a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org  void Reset(uint16_t* buffer,
184a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org             unsigned buffer_length,
185a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org             const uint8_t* stream,
186a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org             unsigned stream_length);
187a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org  static void WriteUtf16Slow(const uint8_t* stream,
188a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org                             uint16_t* data,
189a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org                             unsigned length);
190a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org  const uint8_t* unbuffered_start_;
191a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org  unsigned utf16_length_;
192a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org  bool last_byte_of_buffer_unused_;
193a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org private:
194a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org  DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase);
195a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org};
196a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org
197a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.orgtemplate <unsigned kBufferSize>
198a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.orgclass Utf8Decoder : public Utf8DecoderBase {
199a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org public:
200a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org  inline Utf8Decoder() {}
201a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org  inline Utf8Decoder(const char* stream, unsigned length);
202a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org  inline void Reset(const char* stream, unsigned length);
203a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org  inline unsigned WriteUtf16(uint16_t* data, unsigned length) const;
204a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org private:
205a6bbcc801f63c451f814d6da77a1a48fba3d36c6yangguo@chromium.org  uint16_t buffer_[kBufferSize];
20643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen};
20743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen
208e90029b96bc4097e0f14d33cc086030d7ad5007awhesse@chromium.org
20943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct Uppercase {
21043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static bool Is(uchar c);
21143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen};
21243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct Lowercase {
21343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static bool Is(uchar c);
21443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen};
21543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct Letter {
21643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static bool Is(uchar c);
21743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen};
21843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct Space {
21943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static bool Is(uchar c);
22043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen};
22143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct Number {
22243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static bool Is(uchar c);
22343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen};
22443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct WhiteSpace {
22543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static bool Is(uchar c);
22643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen};
22743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct LineTerminator {
22843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static bool Is(uchar c);
22943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen};
23043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct CombiningMark {
23143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static bool Is(uchar c);
23243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen};
23343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct ConnectorPunctuation {
23443d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static bool Is(uchar c);
23543d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen};
23643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct ToLowercase {
237a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org  static const int kMaxWidth = 3;
23843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static int Convert(uchar c,
23943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen                     uchar n,
24043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen                     uchar* result,
24143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen                     bool* allow_caching_ptr);
24243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen};
24343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansenstruct ToUppercase {
244a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org  static const int kMaxWidth = 3;
245a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org  static int Convert(uchar c,
246a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org                     uchar n,
247a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org                     uchar* result,
248a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org                     bool* allow_caching_ptr);
249a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org};
250a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.orgstruct Ecma262Canonicalize {
251a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org  static const int kMaxWidth = 1;
252a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org  static int Convert(uchar c,
253a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org                     uchar n,
254a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org                     uchar* result,
255a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org                     bool* allow_caching_ptr);
256a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org};
257a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.orgstruct Ecma262UnCanonicalize {
258a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org  static const int kMaxWidth = 4;
259a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org  static int Convert(uchar c,
260a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org                     uchar n,
261a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org                     uchar* result,
262a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org                     bool* allow_caching_ptr);
263a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org};
264a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.orgstruct CanonicalizationRange {
265a74f0daeb278665869b4b6a3bc2739e88fed93b1ager@chromium.org  static const int kMaxWidth = 1;
26643d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen  static int Convert(uchar c,
26743d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen                     uchar n,
26843d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen                     uchar* result,
26943d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen                     bool* allow_caching_ptr);
27043d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen};
27143d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen
27243d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen}  // namespace unibrow
27343d26ecc3563a46f62a0224030667c8f8f3f6cebchristian.plesner.hansen
2745ec4892aef9cca42940d7d92302abf674365f6b7ager@chromium.org#endif  // V8_UNICODE_H_
275