1/* 2 * _codecs_kr.c: Codecs collection for Korean encodings 3 * 4 * Written by Hye-Shik Chang <perky@FreeBSD.org> 5 */ 6 7#include "cjkcodecs.h" 8#include "mappings_kr.h" 9 10/* 11 * EUC-KR codec 12 */ 13 14#define EUCKR_JAMO_FIRSTBYTE 0xA4 15#define EUCKR_JAMO_FILLER 0xD4 16 17static const unsigned char u2cgk_choseong[19] = { 18 0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2, 19 0xb3, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 20 0xbc, 0xbd, 0xbe 21}; 22static const unsigned char u2cgk_jungseong[21] = { 23 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 24 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 25 0xcf, 0xd0, 0xd1, 0xd2, 0xd3 26}; 27static const unsigned char u2cgk_jongseong[28] = { 28 0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 29 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 30 0xb1, 0xb2, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xba, 31 0xbb, 0xbc, 0xbd, 0xbe 32}; 33 34ENCODER(euc_kr) 35{ 36 while (*inpos < inlen) { 37 Py_UCS4 c = INCHAR1; 38 DBCHAR code; 39 40 if (c < 0x80) { 41 WRITEBYTE1((unsigned char)c); 42 NEXT(1, 1); 43 continue; 44 } 45 46 if (c > 0xFFFF) 47 return 1; 48 49 REQUIRE_OUTBUF(2); 50 if (TRYMAP_ENC(cp949, code, c)) 51 ; 52 else 53 return 1; 54 55 if ((code & 0x8000) == 0) { 56 /* KS X 1001 coded character */ 57 OUTBYTE1((code >> 8) | 0x80); 58 OUTBYTE2((code & 0xFF) | 0x80); 59 NEXT(1, 2); 60 } 61 else { 62 /* Mapping is found in CP949 extension, 63 but we encode it in KS X 1001:1998 Annex 3, 64 make-up sequence for EUC-KR. */ 65 66 REQUIRE_OUTBUF(8); 67 68 /* syllable composition precedence */ 69 OUTBYTE1(EUCKR_JAMO_FIRSTBYTE); 70 OUTBYTE2(EUCKR_JAMO_FILLER); 71 72 /* All code points in CP949 extension are in unicode 73 * Hangul Syllable area. */ 74 assert(0xac00 <= c && c <= 0xd7a3); 75 c -= 0xac00; 76 77 OUTBYTE3(EUCKR_JAMO_FIRSTBYTE); 78 OUTBYTE4(u2cgk_choseong[c / 588]); 79 NEXT_OUT(4); 80 81 OUTBYTE1(EUCKR_JAMO_FIRSTBYTE); 82 OUTBYTE2(u2cgk_jungseong[(c / 28) % 21]); 83 OUTBYTE3(EUCKR_JAMO_FIRSTBYTE); 84 OUTBYTE4(u2cgk_jongseong[c % 28]); 85 NEXT(1, 4); 86 } 87 } 88 89 return 0; 90} 91 92#define NONE 127 93 94static const unsigned char cgk2u_choseong[] = { /* [A1, BE] */ 95 0, 1, NONE, 2, NONE, NONE, 3, 4, 96 5, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 97 6, 7, 8, NONE, 9, 10, 11, 12, 98 13, 14, 15, 16, 17, 18 99}; 100static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */ 101 1, 2, 3, 4, 5, 6, 7, NONE, 102 8, 9, 10, 11, 12, 13, 14, 15, 103 16, 17, NONE, 18, 19, 20, 21, 22, 104 NONE, 23, 24, 25, 26, 27 105}; 106 107DECODER(euc_kr) 108{ 109 while (inleft > 0) { 110 unsigned char c = INBYTE1; 111 Py_UCS4 decoded; 112 113 if (c < 0x80) { 114 OUTCHAR(c); 115 NEXT_IN(1); 116 continue; 117 } 118 119 REQUIRE_INBUF(2); 120 121 if (c == EUCKR_JAMO_FIRSTBYTE && 122 INBYTE2 == EUCKR_JAMO_FILLER) { 123 /* KS X 1001:1998 Annex 3 make-up sequence */ 124 DBCHAR cho, jung, jong; 125 126 REQUIRE_INBUF(8); 127 if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE || 128 (*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE || 129 (*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE) 130 return 1; 131 132 c = (*inbuf)[3]; 133 if (0xa1 <= c && c <= 0xbe) 134 cho = cgk2u_choseong[c - 0xa1]; 135 else 136 cho = NONE; 137 138 c = (*inbuf)[5]; 139 jung = (0xbf <= c && c <= 0xd3) ? c - 0xbf : NONE; 140 141 c = (*inbuf)[7]; 142 if (c == EUCKR_JAMO_FILLER) 143 jong = 0; 144 else if (0xa1 <= c && c <= 0xbe) 145 jong = cgk2u_jongseong[c - 0xa1]; 146 else 147 jong = NONE; 148 149 if (cho == NONE || jung == NONE || jong == NONE) 150 return 1; 151 152 OUTCHAR(0xac00 + cho*588 + jung*28 + jong); 153 NEXT_IN(8); 154 } 155 else if (TRYMAP_DEC(ksx1001, decoded, c ^ 0x80, INBYTE2 ^ 0x80)) { 156 OUTCHAR(decoded); 157 NEXT_IN(2); 158 } 159 else 160 return 1; 161 } 162 163 return 0; 164} 165#undef NONE 166 167 168/* 169 * CP949 codec 170 */ 171 172ENCODER(cp949) 173{ 174 while (*inpos < inlen) { 175 Py_UCS4 c = INCHAR1; 176 DBCHAR code; 177 178 if (c < 0x80) { 179 WRITEBYTE1((unsigned char)c); 180 NEXT(1, 1); 181 continue; 182 } 183 184 if (c > 0xFFFF) 185 return 1; 186 187 REQUIRE_OUTBUF(2); 188 if (TRYMAP_ENC(cp949, code, c)) 189 ; 190 else 191 return 1; 192 193 OUTBYTE1((code >> 8) | 0x80); 194 if (code & 0x8000) 195 OUTBYTE2(code & 0xFF); /* MSB set: CP949 */ 196 else 197 OUTBYTE2((code & 0xFF) | 0x80); /* MSB unset: ks x 1001 */ 198 NEXT(1, 2); 199 } 200 201 return 0; 202} 203 204DECODER(cp949) 205{ 206 while (inleft > 0) { 207 unsigned char c = INBYTE1; 208 Py_UCS4 decoded; 209 210 if (c < 0x80) { 211 OUTCHAR(c); 212 NEXT_IN(1); 213 continue; 214 } 215 216 REQUIRE_INBUF(2); 217 if (TRYMAP_DEC(ksx1001, decoded, c ^ 0x80, INBYTE2 ^ 0x80)) 218 OUTCHAR(decoded); 219 else if (TRYMAP_DEC(cp949ext, decoded, c, INBYTE2)) 220 OUTCHAR(decoded); 221 else 222 return 1; 223 224 NEXT_IN(2); 225 } 226 227 return 0; 228} 229 230 231/* 232 * JOHAB codec 233 */ 234 235static const unsigned char u2johabidx_choseong[32] = { 236 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 237 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 238 0x10, 0x11, 0x12, 0x13, 0x14, 239}; 240static const unsigned char u2johabidx_jungseong[32] = { 241 0x03, 0x04, 0x05, 0x06, 0x07, 242 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 243 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 244 0x1a, 0x1b, 0x1c, 0x1d, 245}; 246static const unsigned char u2johabidx_jongseong[32] = { 247 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 248 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 249 0x10, 0x11, 0x13, 0x14, 0x15, 0x16, 0x17, 250 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 251}; 252static const DBCHAR u2johabjamo[] = { 253 0x8841, 0x8c41, 0x8444, 0x9041, 0x8446, 0x8447, 0x9441, 254 0x9841, 0x9c41, 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f, 255 0x8450, 0xa041, 0xa441, 0xa841, 0x8454, 0xac41, 0xb041, 0xb441, 256 0xb841, 0xbc41, 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041, 0x8461, 257 0x8481, 0x84a1, 0x84c1, 0x84e1, 0x8541, 0x8561, 0x8581, 0x85a1, 258 0x85c1, 0x85e1, 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1, 259 0x8741, 0x8761, 0x8781, 0x87a1, 260}; 261 262ENCODER(johab) 263{ 264 while (*inpos < inlen) { 265 Py_UCS4 c = INCHAR1; 266 DBCHAR code; 267 268 if (c < 0x80) { 269 WRITEBYTE1((unsigned char)c); 270 NEXT(1, 1); 271 continue; 272 } 273 274 if (c > 0xFFFF) 275 return 1; 276 277 REQUIRE_OUTBUF(2); 278 279 if (c >= 0xac00 && c <= 0xd7a3) { 280 c -= 0xac00; 281 code = 0x8000 | 282 (u2johabidx_choseong[c / 588] << 10) | 283 (u2johabidx_jungseong[(c / 28) % 21] << 5) | 284 u2johabidx_jongseong[c % 28]; 285 } 286 else if (c >= 0x3131 && c <= 0x3163) 287 code = u2johabjamo[c - 0x3131]; 288 else if (TRYMAP_ENC(cp949, code, c)) { 289 unsigned char c1, c2, t2; 290 unsigned short t1; 291 292 assert((code & 0x8000) == 0); 293 c1 = code >> 8; 294 c2 = code & 0xff; 295 if (((c1 >= 0x21 && c1 <= 0x2c) || 296 (c1 >= 0x4a && c1 <= 0x7d)) && 297 (c2 >= 0x21 && c2 <= 0x7e)) { 298 t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) : 299 (c1 - 0x21 + 0x197)); 300 t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21); 301 OUTBYTE1(t1 >> 1); 302 OUTBYTE2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43); 303 NEXT(1, 2); 304 continue; 305 } 306 else 307 return 1; 308 } 309 else 310 return 1; 311 312 OUTBYTE1(code >> 8); 313 OUTBYTE2(code & 0xff); 314 NEXT(1, 2); 315 } 316 317 return 0; 318} 319 320#define FILL 0xfd 321#define NONE 0xff 322 323static const unsigned char johabidx_choseong[32] = { 324 NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 325 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 326 0x0e, 0x0f, 0x10, 0x11, 0x12, NONE, NONE, NONE, 327 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 328}; 329static const unsigned char johabidx_jungseong[32] = { 330 NONE, NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04, 331 NONE, NONE, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 332 NONE, NONE, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 333 NONE, NONE, 0x11, 0x12, 0x13, 0x14, NONE, NONE, 334}; 335static const unsigned char johabidx_jongseong[32] = { 336 NONE, FILL, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 337 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 338 0x0f, 0x10, NONE, 0x11, 0x12, 0x13, 0x14, 0x15, 339 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, NONE, NONE, 340}; 341 342static const unsigned char johabjamo_choseong[32] = { 343 NONE, FILL, 0x31, 0x32, 0x34, 0x37, 0x38, 0x39, 344 0x41, 0x42, 0x43, 0x45, 0x46, 0x47, 0x48, 0x49, 345 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE, NONE, 346 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 347}; 348static const unsigned char johabjamo_jungseong[32] = { 349 NONE, NONE, FILL, 0x4f, 0x50, 0x51, 0x52, 0x53, 350 NONE, NONE, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 351 NONE, NONE, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 352 NONE, NONE, 0x60, 0x61, 0x62, 0x63, NONE, NONE, 353}; 354static const unsigned char johabjamo_jongseong[32] = { 355 NONE, FILL, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 356 0x37, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 357 0x40, 0x41, NONE, 0x42, 0x44, 0x45, 0x46, 0x47, 358 0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE, 359}; 360 361DECODER(johab) 362{ 363 while (inleft > 0) { 364 unsigned char c = INBYTE1, c2; 365 Py_UCS4 decoded; 366 367 if (c < 0x80) { 368 OUTCHAR(c); 369 NEXT_IN(1); 370 continue; 371 } 372 373 REQUIRE_INBUF(2); 374 c2 = INBYTE2; 375 376 if (c < 0xd8) { 377 /* johab hangul */ 378 unsigned char c_cho, c_jung, c_jong; 379 unsigned char i_cho, i_jung, i_jong; 380 381 c_cho = (c >> 2) & 0x1f; 382 c_jung = ((c << 3) | c2 >> 5) & 0x1f; 383 c_jong = c2 & 0x1f; 384 385 i_cho = johabidx_choseong[c_cho]; 386 i_jung = johabidx_jungseong[c_jung]; 387 i_jong = johabidx_jongseong[c_jong]; 388 389 if (i_cho == NONE || i_jung == NONE || i_jong == NONE) 390 return 1; 391 392 /* we don't use U+1100 hangul jamo yet. */ 393 if (i_cho == FILL) { 394 if (i_jung == FILL) { 395 if (i_jong == FILL) 396 OUTCHAR(0x3000); 397 else 398 OUTCHAR(0x3100 | 399 johabjamo_jongseong[c_jong]); 400 } 401 else { 402 if (i_jong == FILL) 403 OUTCHAR(0x3100 | 404 johabjamo_jungseong[c_jung]); 405 else 406 return 1; 407 } 408 } else { 409 if (i_jung == FILL) { 410 if (i_jong == FILL) 411 OUTCHAR(0x3100 | 412 johabjamo_choseong[c_cho]); 413 else 414 return 1; 415 } 416 else 417 OUTCHAR(0xac00 + 418 i_cho * 588 + 419 i_jung * 28 + 420 (i_jong == FILL ? 0 : i_jong)); 421 } 422 NEXT_IN(2); 423 } else { 424 /* KS X 1001 except hangul jamos and syllables */ 425 if (c == 0xdf || c > 0xf9 || 426 c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) || 427 (c2 & 0x7f) == 0x7f || 428 (c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3))) 429 return 1; 430 else { 431 unsigned char t1, t2; 432 433 t1 = (c < 0xe0 ? 2 * (c - 0xd9) : 434 2 * c - 0x197); 435 t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43); 436 t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21; 437 t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21; 438 439 if (TRYMAP_DEC(ksx1001, decoded, t1, t2)) { 440 OUTCHAR(decoded); 441 NEXT_IN(2); 442 } 443 else { 444 return 1; 445 } 446 } 447 } 448 } 449 450 return 0; 451} 452#undef NONE 453#undef FILL 454 455 456BEGIN_MAPPINGS_LIST 457 MAPPING_DECONLY(ksx1001) 458 MAPPING_ENCONLY(cp949) 459 MAPPING_DECONLY(cp949ext) 460END_MAPPINGS_LIST 461 462BEGIN_CODECS_LIST 463 CODEC_STATELESS(euc_kr) 464 CODEC_STATELESS(cp949) 465 CODEC_STATELESS(johab) 466END_CODECS_LIST 467 468I_AM_A_MODULE_FOR(kr) 469