1// UTFConvert.cpp
2
3#include "StdAfx.h"
4
5#include "UTFConvert.h"
6#include "Types.h"
7
8static const Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
9
10static Bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, size_t srcLen)
11{
12  size_t destPos = 0, srcPos = 0;
13  for (;;)
14  {
15    Byte c;
16    int numAdds;
17    if (srcPos == srcLen)
18    {
19      *destLen = destPos;
20      return True;
21    }
22    c = (Byte)src[srcPos++];
23
24    if (c < 0x80)
25    {
26      if (dest)
27        dest[destPos] = (wchar_t)c;
28      destPos++;
29      continue;
30    }
31    if (c < 0xC0)
32      break;
33    for (numAdds = 1; numAdds < 5; numAdds++)
34      if (c < kUtf8Limits[numAdds])
35        break;
36    UInt32 value = (c - kUtf8Limits[numAdds - 1]);
37
38    do
39    {
40      Byte c2;
41      if (srcPos == srcLen)
42        break;
43      c2 = (Byte)src[srcPos++];
44      if (c2 < 0x80 || c2 >= 0xC0)
45        break;
46      value <<= 6;
47      value |= (c2 - 0x80);
48    }
49    while (--numAdds != 0);
50
51    if (value < 0x10000)
52    {
53      if (dest)
54        dest[destPos] = (wchar_t)value;
55      destPos++;
56    }
57    else
58    {
59      value -= 0x10000;
60      if (value >= 0x100000)
61        break;
62      if (dest)
63      {
64        dest[destPos + 0] = (wchar_t)(0xD800 + (value >> 10));
65        dest[destPos + 1] = (wchar_t)(0xDC00 + (value & 0x3FF));
66      }
67      destPos += 2;
68    }
69  }
70  *destLen = destPos;
71  return False;
72}
73
74static Bool Utf16_To_Utf8(char *dest, size_t *destLen, const wchar_t *src, size_t srcLen)
75{
76  size_t destPos = 0, srcPos = 0;
77  for (;;)
78  {
79    unsigned numAdds;
80    UInt32 value;
81    if (srcPos == srcLen)
82    {
83      *destLen = destPos;
84      return True;
85    }
86    value = src[srcPos++];
87    if (value < 0x80)
88    {
89      if (dest)
90        dest[destPos] = (char)value;
91      destPos++;
92      continue;
93    }
94    if (value >= 0xD800 && value < 0xE000)
95    {
96      UInt32 c2;
97      if (value >= 0xDC00 || srcPos == srcLen)
98        break;
99      c2 = src[srcPos++];
100      if (c2 < 0xDC00 || c2 >= 0xE000)
101        break;
102      value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000;
103    }
104    for (numAdds = 1; numAdds < 5; numAdds++)
105      if (value < (((UInt32)1) << (numAdds * 5 + 6)))
106        break;
107    if (dest)
108      dest[destPos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds)));
109    destPos++;
110    do
111    {
112      numAdds--;
113      if (dest)
114        dest[destPos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F));
115      destPos++;
116    }
117    while (numAdds != 0);
118  }
119  *destLen = destPos;
120  return False;
121}
122
123bool ConvertUTF8ToUnicode(const AString &src, UString &dest)
124{
125  dest.Empty();
126  size_t destLen = 0;
127  Utf8_To_Utf16(NULL, &destLen, src, src.Length());
128  wchar_t *p = dest.GetBuffer((int)destLen);
129  Bool res = Utf8_To_Utf16(p, &destLen, src, src.Length());
130  p[destLen] = 0;
131  dest.ReleaseBuffer();
132  return res ? true : false;
133}
134
135bool ConvertUnicodeToUTF8(const UString &src, AString &dest)
136{
137  dest.Empty();
138  size_t destLen = 0;
139  Utf16_To_Utf8(NULL, &destLen, src, src.Length());
140  char *p = dest.GetBuffer((int)destLen);
141  Bool res = Utf16_To_Utf8(p, &destLen, src, src.Length());
142  p[destLen] = 0;
143  dest.ReleaseBuffer();
144  return res ? true : false;
145}
146