1// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fxcrt/fx_basic.h"
8
9void CFX_UTF8Decoder::Clear() {
10  m_Buffer.Clear();
11  m_PendingBytes = 0;
12}
13void CFX_UTF8Decoder::AppendChar(uint32_t ch) {
14  m_Buffer.AppendChar((FX_WCHAR)ch);
15}
16void CFX_UTF8Decoder::Input(uint8_t byte) {
17  if (byte < 0x80) {
18    m_PendingBytes = 0;
19    m_Buffer.AppendChar(byte);
20  } else if (byte < 0xc0) {
21    if (m_PendingBytes == 0) {
22      return;
23    }
24    m_PendingBytes--;
25    m_PendingChar |= (byte & 0x3f) << (m_PendingBytes * 6);
26    if (m_PendingBytes == 0) {
27      AppendChar(m_PendingChar);
28    }
29  } else if (byte < 0xe0) {
30    m_PendingBytes = 1;
31    m_PendingChar = (byte & 0x1f) << 6;
32  } else if (byte < 0xf0) {
33    m_PendingBytes = 2;
34    m_PendingChar = (byte & 0x0f) << 12;
35  } else if (byte < 0xf8) {
36    m_PendingBytes = 3;
37    m_PendingChar = (byte & 0x07) << 18;
38  } else if (byte < 0xfc) {
39    m_PendingBytes = 4;
40    m_PendingChar = (byte & 0x03) << 24;
41  } else if (byte < 0xfe) {
42    m_PendingBytes = 5;
43    m_PendingChar = (byte & 0x01) << 30;
44  }
45}
46void CFX_UTF8Encoder::Input(FX_WCHAR unicode) {
47  if ((uint32_t)unicode < 0x80) {
48    m_Buffer.AppendChar(unicode);
49  } else {
50    if ((uint32_t)unicode >= 0x80000000) {
51      return;
52    }
53    int nbytes = 0;
54    if ((uint32_t)unicode < 0x800) {
55      nbytes = 2;
56    } else if ((uint32_t)unicode < 0x10000) {
57      nbytes = 3;
58    } else if ((uint32_t)unicode < 0x200000) {
59      nbytes = 4;
60    } else if ((uint32_t)unicode < 0x4000000) {
61      nbytes = 5;
62    } else {
63      nbytes = 6;
64    }
65    static uint8_t prefix[] = {0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
66    int order = 1 << ((nbytes - 1) * 6);
67    int code = unicode;
68    m_Buffer.AppendChar(prefix[nbytes - 2] | (code / order));
69    for (int i = 0; i < nbytes - 1; i++) {
70      code = code % order;
71      order >>= 6;
72      m_Buffer.AppendChar(0x80 | (code / order));
73    }
74  }
75}
76
77CFX_ByteString FX_UTF8Encode(const CFX_WideStringC& wsStr) {
78  FX_STRSIZE len = wsStr.GetLength();
79  const FX_WCHAR* pStr = wsStr.c_str();
80  CFX_UTF8Encoder encoder;
81  while (len-- > 0)
82    encoder.Input(*pStr++);
83
84  return CFX_ByteString(encoder.GetResult());
85}
86