1// UTFConvert.cpp
2
3#include "StdAfx.h"
4
5#include "MyTypes.h"
6#include "UTFConvert.h"
7
8static const Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
9
10bool CheckUTF8(const char *src) throw()
11{
12  for (;;)
13  {
14    Byte c;
15    unsigned numAdds;
16    c = *src++;
17    if (c == 0)
18      return true;
19
20    if (c < 0x80)
21      continue;
22    if (c < 0xC0)
23      return false;
24    for (numAdds = 1; numAdds < 5; numAdds++)
25      if (c < kUtf8Limits[numAdds])
26        break;
27    UInt32 value = (c - kUtf8Limits[numAdds - 1]);
28
29    do
30    {
31      Byte c2 = *src++;
32      if (c2 < 0x80 || c2 >= 0xC0)
33        return false;
34      value <<= 6;
35      value |= (c2 - 0x80);
36    }
37    while (--numAdds);
38
39    if (value >= 0x110000)
40      return false;
41  }
42}
43
44
45static Bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, size_t srcLen) throw()
46{
47  size_t destPos = 0, srcPos = 0;
48  for (;;)
49  {
50    Byte c;
51    unsigned numAdds;
52    if (srcPos == srcLen)
53    {
54      *destLen = destPos;
55      return True;
56    }
57    c = (Byte)src[srcPos++];
58
59    if (c < 0x80)
60    {
61      if (dest)
62        dest[destPos] = (wchar_t)c;
63      destPos++;
64      continue;
65    }
66    if (c < 0xC0)
67      break;
68    for (numAdds = 1; numAdds < 5; numAdds++)
69      if (c < kUtf8Limits[numAdds])
70        break;
71    UInt32 value = (c - kUtf8Limits[numAdds - 1]);
72
73    do
74    {
75      Byte c2;
76      if (srcPos == srcLen)
77        break;
78      c2 = (Byte)src[srcPos++];
79      if (c2 < 0x80 || c2 >= 0xC0)
80        break;
81      value <<= 6;
82      value |= (c2 - 0x80);
83    }
84    while (--numAdds);
85
86    if (value < 0x10000)
87    {
88      if (dest)
89        dest[destPos] = (wchar_t)value;
90      destPos++;
91    }
92    else
93    {
94      value -= 0x10000;
95      if (value >= 0x100000)
96        break;
97      if (dest)
98      {
99        dest[destPos + 0] = (wchar_t)(0xD800 + (value >> 10));
100        dest[destPos + 1] = (wchar_t)(0xDC00 + (value & 0x3FF));
101      }
102      destPos += 2;
103    }
104  }
105  *destLen = destPos;
106  return False;
107}
108
109static Bool Utf16_To_Utf8(char *dest, size_t *destLen, const wchar_t *src, size_t srcLen)
110{
111  size_t destPos = 0, srcPos = 0;
112  for (;;)
113  {
114    unsigned numAdds;
115    UInt32 value;
116    if (srcPos == srcLen)
117    {
118      *destLen = destPos;
119      return True;
120    }
121    value = src[srcPos++];
122    if (value < 0x80)
123    {
124      if (dest)
125        dest[destPos] = (char)value;
126      destPos++;
127      continue;
128    }
129    if (value >= 0xD800 && value < 0xE000)
130    {
131      UInt32 c2;
132      if (value >= 0xDC00 || srcPos == srcLen)
133        break;
134      c2 = src[srcPos++];
135      if (c2 < 0xDC00 || c2 >= 0xE000)
136        break;
137      value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000;
138    }
139    for (numAdds = 1; numAdds < 5; numAdds++)
140      if (value < (((UInt32)1) << (numAdds * 5 + 6)))
141        break;
142    if (dest)
143      dest[destPos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds)));
144    destPos++;
145    do
146    {
147      numAdds--;
148      if (dest)
149        dest[destPos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F));
150      destPos++;
151    }
152    while (numAdds != 0);
153  }
154  *destLen = destPos;
155  return False;
156}
157
158bool ConvertUTF8ToUnicode(const AString &src, UString &dest)
159{
160  dest.Empty();
161  size_t destLen = 0;
162  Utf8_To_Utf16(NULL, &destLen, src, src.Len());
163  Bool res = Utf8_To_Utf16(dest.GetBuffer((unsigned)destLen), &destLen, src, src.Len());
164  dest.ReleaseBuffer((unsigned)destLen);
165  return res ? true : false;
166}
167
168bool ConvertUnicodeToUTF8(const UString &src, AString &dest)
169{
170  dest.Empty();
171  size_t destLen = 0;
172  Utf16_To_Utf8(NULL, &destLen, src, src.Len());
173  Bool res = Utf16_To_Utf8(dest.GetBuffer((unsigned)destLen), &destLen, src, src.Len());
174  dest.ReleaseBuffer((unsigned)destLen);
175  return res ? true : false;
176}
177