1/*
2 * _codecs_kr.c: Codecs collection for Korean encodings
3 *
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
5 */
6
7#include "cjkcodecs.h"
8#include "mappings_kr.h"
9
10/*
11 * EUC-KR codec
12 */
13
14#define EUCKR_JAMO_FIRSTBYTE    0xA4
15#define EUCKR_JAMO_FILLER       0xD4
16
17static const unsigned char u2cgk_choseong[19] = {
18    0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2,
19    0xb3, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb,
20    0xbc, 0xbd, 0xbe
21};
22static const unsigned char u2cgk_jungseong[21] = {
23    0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6,
24    0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce,
25    0xcf, 0xd0, 0xd1, 0xd2, 0xd3
26};
27static const unsigned char u2cgk_jongseong[28] = {
28    0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
29    0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
30    0xb1, 0xb2, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xba,
31    0xbb, 0xbc, 0xbd, 0xbe
32};
33
34ENCODER(euc_kr)
35{
36    while (inleft > 0) {
37        Py_UNICODE c = IN1;
38        DBCHAR code;
39
40        if (c < 0x80) {
41            WRITE1((unsigned char)c)
42            NEXT(1, 1)
43            continue;
44        }
45        UCS4INVALID(c)
46
47        REQUIRE_OUTBUF(2)
48        TRYMAP_ENC(cp949, code, c);
49        else return 1;
50
51        if ((code & 0x8000) == 0) {
52            /* KS X 1001 coded character */
53            OUT1((code >> 8) | 0x80)
54            OUT2((code & 0xFF) | 0x80)
55            NEXT(1, 2)
56        }
57        else {          /* Mapping is found in CP949 extension,
58                 * but we encode it in KS X 1001:1998 Annex 3,
59                 * make-up sequence for EUC-KR. */
60
61            REQUIRE_OUTBUF(8)
62
63            /* syllable composition precedence */
64            OUT1(EUCKR_JAMO_FIRSTBYTE)
65            OUT2(EUCKR_JAMO_FILLER)
66
67            /* All codepoints in CP949 extension are in unicode
68             * Hangul Syllable area. */
69            assert(0xac00 <= c && c <= 0xd7a3);
70            c -= 0xac00;
71
72            OUT3(EUCKR_JAMO_FIRSTBYTE)
73            OUT4(u2cgk_choseong[c / 588])
74            NEXT_OUT(4)
75
76            OUT1(EUCKR_JAMO_FIRSTBYTE)
77            OUT2(u2cgk_jungseong[(c / 28) % 21])
78            OUT3(EUCKR_JAMO_FIRSTBYTE)
79            OUT4(u2cgk_jongseong[c % 28])
80            NEXT(1, 4)
81        }
82    }
83
84    return 0;
85}
86
87#define NONE    127
88
89static const unsigned char cgk2u_choseong[] = { /* [A1, BE] */
90       0,    1, NONE,    2, NONE, NONE,    3,    4,
91       5, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
92       6,    7,    8, NONE,    9,   10,   11,   12,
93      13,   14,   15,   16,   17,   18
94};
95static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */
96       1,    2,    3,    4,    5,    6,    7, NONE,
97       8,    9,   10,   11,   12,   13,   14,   15,
98      16,   17, NONE,   18,   19,   20,   21,   22,
99    NONE,   23,   24,   25,   26,   27
100};
101
102DECODER(euc_kr)
103{
104    while (inleft > 0) {
105        unsigned char c = IN1;
106
107        REQUIRE_OUTBUF(1)
108
109        if (c < 0x80) {
110            OUT1(c)
111            NEXT(1, 1)
112            continue;
113        }
114
115        REQUIRE_INBUF(2)
116
117        if (c == EUCKR_JAMO_FIRSTBYTE &&
118            IN2 == EUCKR_JAMO_FILLER) {
119            /* KS X 1001:1998 Annex 3 make-up sequence */
120            DBCHAR cho, jung, jong;
121
122            REQUIRE_INBUF(8)
123            if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE ||
124                (*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE ||
125                (*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE)
126                return 8;
127
128            c = (*inbuf)[3];
129            if (0xa1 <= c && c <= 0xbe)
130                cho = cgk2u_choseong[c - 0xa1];
131            else
132                cho = NONE;
133
134            c = (*inbuf)[5];
135            jung = (0xbf <= c && c <= 0xd3) ? c - 0xbf : NONE;
136
137            c = (*inbuf)[7];
138            if (c == EUCKR_JAMO_FILLER)
139                jong = 0;
140            else if (0xa1 <= c && c <= 0xbe)
141                jong = cgk2u_jongseong[c - 0xa1];
142            else
143                jong = NONE;
144
145            if (cho == NONE || jung == NONE || jong == NONE)
146                return 8;
147
148            OUT1(0xac00 + cho*588 + jung*28 + jong);
149            NEXT(8, 1)
150        }
151        else TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
152            NEXT(2, 1)
153        }
154        else
155            return 2;
156    }
157
158    return 0;
159}
160#undef NONE
161
162
163/*
164 * CP949 codec
165 */
166
167ENCODER(cp949)
168{
169    while (inleft > 0) {
170        Py_UNICODE c = IN1;
171        DBCHAR code;
172
173        if (c < 0x80) {
174            WRITE1((unsigned char)c)
175            NEXT(1, 1)
176            continue;
177        }
178        UCS4INVALID(c)
179
180        REQUIRE_OUTBUF(2)
181        TRYMAP_ENC(cp949, code, c);
182        else return 1;
183
184        OUT1((code >> 8) | 0x80)
185        if (code & 0x8000)
186            OUT2(code & 0xFF) /* MSB set: CP949 */
187        else
188            OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */
189        NEXT(1, 2)
190    }
191
192    return 0;
193}
194
195DECODER(cp949)
196{
197    while (inleft > 0) {
198        unsigned char c = IN1;
199
200        REQUIRE_OUTBUF(1)
201
202        if (c < 0x80) {
203            OUT1(c)
204            NEXT(1, 1)
205            continue;
206        }
207
208        REQUIRE_INBUF(2)
209        TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
210        else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
211        else return 2;
212
213        NEXT(2, 1)
214    }
215
216    return 0;
217}
218
219
220/*
221 * JOHAB codec
222 */
223
224static const unsigned char u2johabidx_choseong[32] = {
225                0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
226    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
227    0x10, 0x11, 0x12, 0x13, 0x14,
228};
229static const unsigned char u2johabidx_jungseong[32] = {
230                      0x03, 0x04, 0x05, 0x06, 0x07,
231                0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
232                0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
233                0x1a, 0x1b, 0x1c, 0x1d,
234};
235static const unsigned char u2johabidx_jongseong[32] = {
236          0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
237    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
238    0x10, 0x11,       0x13, 0x14, 0x15, 0x16, 0x17,
239    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
240};
241static const DBCHAR u2johabjamo[] = {
242            0x8841, 0x8c41, 0x8444, 0x9041, 0x8446, 0x8447, 0x9441,
243    0x9841, 0x9c41, 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f,
244    0x8450, 0xa041, 0xa441, 0xa841, 0x8454, 0xac41, 0xb041, 0xb441,
245    0xb841, 0xbc41, 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041, 0x8461,
246    0x8481, 0x84a1, 0x84c1, 0x84e1, 0x8541, 0x8561, 0x8581, 0x85a1,
247    0x85c1, 0x85e1, 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
248    0x8741, 0x8761, 0x8781, 0x87a1,
249};
250
251ENCODER(johab)
252{
253    while (inleft > 0) {
254        Py_UNICODE c = IN1;
255        DBCHAR code;
256
257        if (c < 0x80) {
258            WRITE1((unsigned char)c)
259            NEXT(1, 1)
260            continue;
261        }
262        UCS4INVALID(c)
263
264        REQUIRE_OUTBUF(2)
265
266        if (c >= 0xac00 && c <= 0xd7a3) {
267            c -= 0xac00;
268            code = 0x8000 |
269                (u2johabidx_choseong[c / 588] << 10) |
270                (u2johabidx_jungseong[(c / 28) % 21] << 5) |
271                u2johabidx_jongseong[c % 28];
272        }
273        else if (c >= 0x3131 && c <= 0x3163)
274            code = u2johabjamo[c - 0x3131];
275        else TRYMAP_ENC(cp949, code, c) {
276            unsigned char c1, c2, t2;
277            unsigned short t1;
278
279            assert((code & 0x8000) == 0);
280            c1 = code >> 8;
281            c2 = code & 0xff;
282            if (((c1 >= 0x21 && c1 <= 0x2c) ||
283                (c1 >= 0x4a && c1 <= 0x7d)) &&
284                (c2 >= 0x21 && c2 <= 0x7e)) {
285                t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) :
286                          (c1 - 0x21 + 0x197));
287                t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);
288                OUT1(t1 >> 1)
289                OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43)
290                NEXT(1, 2)
291                continue;
292            }
293            else
294                return 1;
295        }
296        else
297            return 1;
298
299        OUT1(code >> 8)
300        OUT2(code & 0xff)
301        NEXT(1, 2)
302    }
303
304    return 0;
305}
306
307#define FILL 0xfd
308#define NONE 0xff
309
310static const unsigned char johabidx_choseong[32] = {
311    NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
312    0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
313    0x0e, 0x0f, 0x10, 0x11, 0x12, NONE, NONE, NONE,
314    NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
315};
316static const unsigned char johabidx_jungseong[32] = {
317    NONE, NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04,
318    NONE, NONE, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
319    NONE, NONE, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
320    NONE, NONE, 0x11, 0x12, 0x13, 0x14, NONE, NONE,
321};
322static const unsigned char johabidx_jongseong[32] = {
323    NONE, FILL, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
324    0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
325    0x0f, 0x10, NONE, 0x11, 0x12, 0x13, 0x14, 0x15,
326    0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, NONE, NONE,
327};
328
329static const unsigned char johabjamo_choseong[32] = {
330    NONE, FILL, 0x31, 0x32, 0x34, 0x37, 0x38, 0x39,
331    0x41, 0x42, 0x43, 0x45, 0x46, 0x47, 0x48, 0x49,
332    0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE, NONE,
333    NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
334};
335static const unsigned char johabjamo_jungseong[32] = {
336    NONE, NONE, FILL, 0x4f, 0x50, 0x51, 0x52, 0x53,
337    NONE, NONE, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
338    NONE, NONE, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
339    NONE, NONE, 0x60, 0x61, 0x62, 0x63, NONE, NONE,
340};
341static const unsigned char johabjamo_jongseong[32] = {
342    NONE, FILL, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
343    0x37, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
344    0x40, 0x41, NONE, 0x42, 0x44, 0x45, 0x46, 0x47,
345    0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE,
346};
347
348DECODER(johab)
349{
350    while (inleft > 0) {
351        unsigned char    c = IN1, c2;
352
353        REQUIRE_OUTBUF(1)
354
355        if (c < 0x80) {
356            OUT1(c)
357            NEXT(1, 1)
358            continue;
359        }
360
361        REQUIRE_INBUF(2)
362        c2 = IN2;
363
364        if (c < 0xd8) {
365            /* johab hangul */
366            unsigned char c_cho, c_jung, c_jong;
367            unsigned char i_cho, i_jung, i_jong;
368
369            c_cho = (c >> 2) & 0x1f;
370            c_jung = ((c << 3) | c2 >> 5) & 0x1f;
371            c_jong = c2 & 0x1f;
372
373            i_cho = johabidx_choseong[c_cho];
374            i_jung = johabidx_jungseong[c_jung];
375            i_jong = johabidx_jongseong[c_jong];
376
377            if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
378                return 2;
379
380            /* we don't use U+1100 hangul jamo yet. */
381            if (i_cho == FILL) {
382                if (i_jung == FILL) {
383                    if (i_jong == FILL)
384                        OUT1(0x3000)
385                    else
386                        OUT1(0x3100 |
387                          johabjamo_jongseong[c_jong])
388                }
389                else {
390                    if (i_jong == FILL)
391                        OUT1(0x3100 |
392                          johabjamo_jungseong[c_jung])
393                    else
394                        return 2;
395                }
396            } else {
397                if (i_jung == FILL) {
398                    if (i_jong == FILL)
399                        OUT1(0x3100 |
400                          johabjamo_choseong[c_cho])
401                    else
402                        return 2;
403                }
404                else
405                    OUT1(0xac00 +
406                         i_cho * 588 +
407                         i_jung * 28 +
408                         (i_jong == FILL ? 0 : i_jong))
409            }
410            NEXT(2, 1)
411        } else {
412            /* KS X 1001 except hangul jamos and syllables */
413            if (c == 0xdf || c > 0xf9 ||
414                c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
415                (c2 & 0x7f) == 0x7f ||
416                (c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
417                return 2;
418            else {
419                unsigned char t1, t2;
420
421                t1 = (c < 0xe0 ? 2 * (c - 0xd9) :
422                         2 * c - 0x197);
423                t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43);
424                t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
425                t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
426
427                TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
428                else return 2;
429                NEXT(2, 1)
430            }
431        }
432    }
433
434    return 0;
435}
436#undef NONE
437#undef FILL
438
439
440BEGIN_MAPPINGS_LIST
441  MAPPING_DECONLY(ksx1001)
442  MAPPING_ENCONLY(cp949)
443  MAPPING_DECONLY(cp949ext)
444END_MAPPINGS_LIST
445
446BEGIN_CODECS_LIST
447  CODEC_STATELESS(euc_kr)
448  CODEC_STATELESS(cp949)
449  CODEC_STATELESS(johab)
450END_CODECS_LIST
451
452I_AM_A_MODULE_FOR(kr)
453