1// Copyright 2014 PDFium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7#include "../../include/fxcrt/fx_basic.h" 8void CFX_UTF8Decoder::Clear() 9{ 10 m_Buffer.Clear(); 11 m_PendingBytes = 0; 12} 13void CFX_UTF8Decoder::AppendChar(FX_DWORD ch) 14{ 15 m_Buffer.AppendChar((FX_WCHAR)ch); 16} 17void CFX_UTF8Decoder::Input(FX_BYTE byte) 18{ 19 if (byte < 0x80) { 20 m_PendingBytes = 0; 21 m_Buffer.AppendChar(byte); 22 } else if (byte < 0xc0) { 23 if (m_PendingBytes == 0) { 24 return; 25 } 26 m_PendingBytes --; 27 m_PendingChar |= (byte & 0x3f) << (m_PendingBytes * 6); 28 if (m_PendingBytes == 0) { 29 AppendChar(m_PendingChar); 30 } 31 } else if (byte < 0xe0) { 32 m_PendingBytes = 1; 33 m_PendingChar = (byte & 0x1f) << 6; 34 } else if (byte < 0xf0) { 35 m_PendingBytes = 2; 36 m_PendingChar = (byte & 0x0f) << 12; 37 } else if (byte < 0xf8) { 38 m_PendingBytes = 3; 39 m_PendingChar = (byte & 0x07) << 18; 40 } else if (byte < 0xfc) { 41 m_PendingBytes = 4; 42 m_PendingChar = (byte & 0x03) << 24; 43 } else if (byte < 0xfe) { 44 m_PendingBytes = 5; 45 m_PendingChar = (byte & 0x01) << 30; 46 } 47} 48void CFX_UTF8Encoder::Input(FX_WCHAR unicode) 49{ 50 if ((FX_DWORD)unicode < 0x80) { 51 m_Buffer.AppendChar(unicode); 52 } else { 53 if ((FX_DWORD)unicode >= 0x80000000) { 54 return; 55 } 56 int nbytes = 0; 57 if ((FX_DWORD)unicode < 0x800) { 58 nbytes = 2; 59 } else if ((FX_DWORD)unicode < 0x10000) { 60 nbytes = 3; 61 } else if ((FX_DWORD)unicode < 0x200000) { 62 nbytes = 4; 63 } else if ((FX_DWORD)unicode < 0x4000000) { 64 nbytes = 5; 65 } else { 66 nbytes = 6; 67 } 68 static FX_BYTE prefix[] = {0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; 69 int order = 1 << ((nbytes - 1) * 6); 70 int code = unicode; 71 m_Buffer.AppendChar(prefix[nbytes - 2] | (code / order)); 72 for (int i = 0; i < nbytes - 1; i ++) { 73 code = code % order; 74 order >>= 6; 75 m_Buffer.AppendChar(0x80 | (code / order)); 76 } 77 } 78} 79CFX_ByteString FX_UTF8Encode(FX_LPCWSTR pwsStr, FX_STRSIZE len) 80{ 81 FXSYS_assert(pwsStr != NULL); 82 if (len < 0) { 83 len = (FX_STRSIZE)FXSYS_wcslen(pwsStr); 84 } 85 CFX_UTF8Encoder encoder; 86 while (len -- > 0) { 87 encoder.Input(*pwsStr ++); 88 } 89 return encoder.GetResult(); 90} 91void FX_UTF8Encode(FX_LPCWSTR pwsStr, FX_STRSIZE len, CFX_ByteStringL &utf8Str, IFX_Allocator* pAllocator) 92{ 93 FXSYS_assert(pwsStr != NULL); 94 if (len < 0) { 95 len = (FX_STRSIZE)FXSYS_wcslen(pwsStr); 96 } 97 CFX_UTF8Encoder encoder(pAllocator); 98 while (len -- > 0) { 99 encoder.Input(*pwsStr ++); 100 } 101 encoder.GetResult(utf8Str); 102} 103