12bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang/* 22bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang * cjkcodecs.h: common header for cjkcodecs 32bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang * 42bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang * Written by Hye-Shik Chang <perky@FreeBSD.org> 52bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang */ 62bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 72bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#ifndef _CJKCODECS_H_ 82bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#define _CJKCODECS_H_ 92bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 104b96c1384e008218bdfeb9e271a094b1ab8484d3Hye-Shik Chang#define PY_SSIZE_T_CLEAN 112bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#include "Python.h" 122bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#include "multibytecodec.h" 132bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 142bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 15e8c9e14af9bd9f2ced50e8b6535e45f0d33d8e72Serhiy Storchaka/* a unicode "undefined" code point */ 16c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define UNIINV 0xFFFE 17331649acc7479f6e10cf6f6d01118d90f58ae600Hye-Shik Chang 18e8c9e14af9bd9f2ced50e8b6535e45f0d33d8e72Serhiy Storchaka/* internal-use DBCS code points which aren't used by any charsets */ 19c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define NOCHAR 0xFFFF 20c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define MULTIC 0xFFFE 21c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define DBCINV 0xFFFD 222bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 232bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang/* shorter macros to save source size of mapping tables */ 242bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#define U UNIINV 252bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#define N NOCHAR 262bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#define M MULTIC 272bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#define D DBCINV 282bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 292bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Changstruct dbcs_index { 30c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou const ucs2_t *map; 31c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou unsigned char bottom, top; 322bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang}; 332bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Changtypedef struct dbcs_index decode_map; 342bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 352bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Changstruct widedbcs_index { 36c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou const ucs4_t *map; 37c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou unsigned char bottom, top; 382bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang}; 392bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Changtypedef struct widedbcs_index widedecode_map; 402bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 412bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Changstruct unim_index { 42c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou const DBCHAR *map; 43c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou unsigned char bottom, top; 442bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang}; 452bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Changtypedef struct unim_index encode_map; 462bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 472bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Changstruct unim_index_bytebased { 48c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou const unsigned char *map; 49c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou unsigned char bottom, top; 502bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang}; 512bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 522bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Changstruct dbcs_map { 53c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou const char *charset; 54c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou const struct unim_index *encmap; 55c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou const struct dbcs_index *decmap; 562bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang}; 572bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 582bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Changstruct pair_encodemap { 59c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou ucs4_t uniseq; 60c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou DBCHAR code; 612bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang}; 622bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 6364a9e38f35e2889ed1c076c820a808f827a5e334Hye-Shik Changstatic const MultibyteCodec *codec_list; 6464a9e38f35e2889ed1c076c820a808f827a5e334Hye-Shik Changstatic const struct dbcs_map *mapping_list; 652bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 66c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define CODEC_INIT(encoding) \ 67c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou static int encoding##_codec_init(const void *config) 68c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou 69c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define ENCODER_INIT(encoding) \ 70c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou static int encoding##_encode_init( \ 71c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou MultibyteCodec_State *state, const void *config) 72c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define ENCODER(encoding) \ 73c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou static Py_ssize_t encoding##_encode( \ 74c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou MultibyteCodec_State *state, const void *config, \ 75c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou const Py_UNICODE **inbuf, Py_ssize_t inleft, \ 76c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou unsigned char **outbuf, Py_ssize_t outleft, int flags) 77c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define ENCODER_RESET(encoding) \ 78c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou static Py_ssize_t encoding##_encode_reset( \ 79c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou MultibyteCodec_State *state, const void *config, \ 80c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou unsigned char **outbuf, Py_ssize_t outleft) 81c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou 82c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define DECODER_INIT(encoding) \ 83c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou static int encoding##_decode_init( \ 84c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou MultibyteCodec_State *state, const void *config) 85c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define DECODER(encoding) \ 86c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou static Py_ssize_t encoding##_decode( \ 87c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou MultibyteCodec_State *state, const void *config, \ 88c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou const unsigned char **inbuf, Py_ssize_t inleft, \ 89c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou Py_UNICODE **outbuf, Py_ssize_t outleft) 90c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define DECODER_RESET(encoding) \ 91c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou static Py_ssize_t encoding##_decode_reset( \ 92c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou MultibyteCodec_State *state, const void *config) 932bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 942bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#if Py_UNICODE_SIZE == 4 95c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define UCS4INVALID(code) \ 96c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if ((code) > 0xFFFF) \ 97c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou return 1; 982bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#else 99c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define UCS4INVALID(code) \ 100c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if (0) ; 1012bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#endif 1022bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 103c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define NEXT_IN(i) \ 104c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (*inbuf) += (i); \ 105c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (inleft) -= (i); 106c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define NEXT_OUT(o) \ 107c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (*outbuf) += (o); \ 108c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (outleft) -= (o); 109c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define NEXT(i, o) \ 110c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou NEXT_IN(i) NEXT_OUT(o) 111c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou 112c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define REQUIRE_INBUF(n) \ 113c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if (inleft < (n)) \ 114c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou return MBERR_TOOFEW; 115c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define REQUIRE_OUTBUF(n) \ 116c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if (outleft < (n)) \ 117c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou return MBERR_TOOSMALL; 1182bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 1192bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#define IN1 ((*inbuf)[0]) 1202bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#define IN2 ((*inbuf)[1]) 1212bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#define IN3 ((*inbuf)[2]) 1222bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#define IN4 ((*inbuf)[3]) 1232bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 1242bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#define OUT1(c) ((*outbuf)[0]) = (c); 1252bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#define OUT2(c) ((*outbuf)[1]) = (c); 1262bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#define OUT3(c) ((*outbuf)[2]) = (c); 1272bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#define OUT4(c) ((*outbuf)[3]) = (c); 1282bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 129c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define WRITE1(c1) \ 130c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou REQUIRE_OUTBUF(1) \ 131c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (*outbuf)[0] = (c1); 132c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define WRITE2(c1, c2) \ 133c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou REQUIRE_OUTBUF(2) \ 134c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (*outbuf)[0] = (c1); \ 135c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (*outbuf)[1] = (c2); 136c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define WRITE3(c1, c2, c3) \ 137c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou REQUIRE_OUTBUF(3) \ 138c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (*outbuf)[0] = (c1); \ 139c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (*outbuf)[1] = (c2); \ 140c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (*outbuf)[2] = (c3); 141c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define WRITE4(c1, c2, c3, c4) \ 142c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou REQUIRE_OUTBUF(4) \ 143c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (*outbuf)[0] = (c1); \ 144c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (*outbuf)[1] = (c2); \ 145c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (*outbuf)[2] = (c3); \ 146c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (*outbuf)[3] = (c4); 1472bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 1482bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#if Py_UNICODE_SIZE == 2 149c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou# define WRITEUCS4(c) \ 150c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou REQUIRE_OUTBUF(2) \ 151c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \ 152c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \ 153c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou NEXT_OUT(2) 1542bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#else 155c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou# define WRITEUCS4(c) \ 156c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou REQUIRE_OUTBUF(1) \ 157c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou **outbuf = (Py_UNICODE)(c); \ 158c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou NEXT_OUT(1) 1592bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#endif 1602bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 161c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define _TRYMAP_ENC(m, assi, val) \ 162c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou ((m)->map != NULL && (val) >= (m)->bottom && \ 163c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (val)<= (m)->top && ((assi) = (m)->map[(val) - \ 164c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (m)->bottom]) != NOCHAR) 165c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define TRYMAP_ENC_COND(charset, assi, uni) \ 166c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou _TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff) 167c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define TRYMAP_ENC(charset, assi, uni) \ 168c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if TRYMAP_ENC_COND(charset, assi, uni) 169c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou 170c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define _TRYMAP_DEC(m, assi, val) \ 171c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou ((m)->map != NULL && (val) >= (m)->bottom && \ 172c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (val)<= (m)->top && ((assi) = (m)->map[(val) - \ 173c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (m)->bottom]) != UNIINV) 174c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define TRYMAP_DEC(charset, assi, c1, c2) \ 175c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if _TRYMAP_DEC(&charset##_decmap[c1], assi, c2) 176c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou 177c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \ 178c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou ((m)->map != NULL && (val) >= (m)->bottom && \ 179c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (val)<= (m)->top && \ 180c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \ 181c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \ 182c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1)) 183c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \ 184c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \ 185c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou assplane, asshi, asslo, (uni) & 0xff) 186c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) \ 187c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if _TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2) 1882bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 1892bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#if Py_UNICODE_SIZE == 2 190c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define DECODE_SURROGATE(c) \ 191c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \ 192c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou REQUIRE_INBUF(2) \ 193c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \ 194c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \ 195c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou ((ucs4_t)(IN2) - 0xdc00); \ 196c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou } \ 197c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou } 198c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1) 1992bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#else 2002bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#define DECODE_SURROGATE(c) {;} 201c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define GET_INSIZE(c) 1 2022bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#endif 2032bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 20464a9e38f35e2889ed1c076c820a808f827a5e334Hye-Shik Chang#define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = { 2052bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL}, 2062bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap}, 2072bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap}, 208c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define END_MAPPINGS_LIST \ 209c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou {"", NULL, NULL} }; \ 210c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou static const struct dbcs_map *mapping_list = \ 211c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (const struct dbcs_map *)_mapping_list; 2122bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 21364a9e38f35e2889ed1c076c820a808f827a5e334Hye-Shik Chang#define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = { 214c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define _STATEFUL_METHODS(enc) \ 215c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou enc##_encode, \ 216c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou enc##_encode_init, \ 217c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou enc##_encode_reset, \ 218c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou enc##_decode, \ 219c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou enc##_decode_init, \ 220c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou enc##_decode_reset, 221c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define _STATELESS_METHODS(enc) \ 222c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou enc##_encode, NULL, NULL, \ 223c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou enc##_decode, NULL, NULL, 224c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define CODEC_STATEFUL(enc) { \ 225c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou #enc, NULL, NULL, \ 226c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou _STATEFUL_METHODS(enc) \ 2272bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang}, 228c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define CODEC_STATELESS(enc) { \ 229c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou #enc, NULL, NULL, \ 230c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou _STATELESS_METHODS(enc) \ 2312bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang}, 232c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define CODEC_STATELESS_WINIT(enc) { \ 233c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou #enc, NULL, \ 234c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou enc##_codec_init, \ 235c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou _STATELESS_METHODS(enc) \ 2362bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang}, 237c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define END_CODECS_LIST \ 238c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou {"", NULL,} }; \ 239c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou static const MultibyteCodec *codec_list = \ 240c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (const MultibyteCodec *)_codec_list; 2412bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 2422bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Changstatic PyObject * 2432bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Changgetmultibytecodec(void) 2442bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang{ 245c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou static PyObject *cofunc = NULL; 246c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou 247c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if (cofunc == NULL) { 248c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec"); 249c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if (mod == NULL) 250c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou return NULL; 251c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou cofunc = PyObject_GetAttrString(mod, "__create_codec"); 252c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou Py_DECREF(mod); 253c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou } 254c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou return cofunc; 2552bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang} 2562bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 2572bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Changstatic PyObject * 2582bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Changgetcodec(PyObject *self, PyObject *encoding) 2592bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang{ 260c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou PyObject *codecobj, *r, *cofunc; 261c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou const MultibyteCodec *codec; 262c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou const char *enc; 263c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou 264c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if (!PyString_Check(encoding)) { 265c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou PyErr_SetString(PyExc_TypeError, 266c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou "encoding name must be a string."); 267c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou return NULL; 268c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou } 269c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou 270c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou cofunc = getmultibytecodec(); 271c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if (cofunc == NULL) 272c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou return NULL; 273c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou 274c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou enc = PyString_AS_STRING(encoding); 275c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou for (codec = codec_list; codec->encoding[0]; codec++) 276c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if (strcmp(codec->encoding, enc) == 0) 277c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou break; 278c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou 279c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if (codec->encoding[0] == '\0') { 280c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou PyErr_SetString(PyExc_LookupError, 281c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou "no such codec is supported."); 282c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou return NULL; 283c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou } 284c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou 285c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou codecobj = PyCapsule_New((void *)codec, PyMultibyteCodec_CAPSULE_NAME, NULL); 286c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if (codecobj == NULL) 287c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou return NULL; 288c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou 289c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou r = PyObject_CallFunctionObjArgs(cofunc, codecobj, NULL); 290c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou Py_DECREF(codecobj); 291c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou 292c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou return r; 2932bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang} 2942bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 2952bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Changstatic struct PyMethodDef __methods[] = { 296c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou {"getcodec", (PyCFunction)getcodec, METH_O, ""}, 297c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou {NULL, NULL}, 2982bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang}; 2992bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 3002bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Changstatic int 3012bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Changregister_maps(PyObject *module) 3022bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang{ 303c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou const struct dbcs_map *h; 304c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou 305c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou for (h = mapping_list; h->charset[0] != '\0'; h++) { 306c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou char mhname[256] = "__map_"; 307c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou int r; 308c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou strcpy(mhname + sizeof("__map_") - 1, h->charset); 309c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou r = PyModule_AddObject(module, mhname, 310c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou PyCapsule_New((void *)h, PyMultibyteCodec_CAPSULE_NAME, NULL)); 311c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if (r == -1) 312c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou return -1; 313c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou } 314c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou return 0; 3152bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang} 3162bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 3172bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#ifdef USING_BINARY_PAIR_SEARCH 3182bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Changstatic DBCHAR 3192bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Changfind_pairencmap(ucs2_t body, ucs2_t modifier, 320c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou const struct pair_encodemap *haystack, int haystacksize) 3212bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang{ 322c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou int pos, min, max; 323c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou ucs4_t value = body << 16 | modifier; 324c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou 325c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou min = 0; 326c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou max = haystacksize; 327c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou 32859dd0e64ad24b0bf35f4900bfe6b1d0f49844ca3Benjamin Peterson for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1) { 329c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if (value < haystack[pos].uniseq) { 33059dd0e64ad24b0bf35f4900bfe6b1d0f49844ca3Benjamin Peterson if (max != pos) { 33159dd0e64ad24b0bf35f4900bfe6b1d0f49844ca3Benjamin Peterson max = pos; 33259dd0e64ad24b0bf35f4900bfe6b1d0f49844ca3Benjamin Peterson continue; 33359dd0e64ad24b0bf35f4900bfe6b1d0f49844ca3Benjamin Peterson } 334c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou } 335c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou else if (value > haystack[pos].uniseq) { 33659dd0e64ad24b0bf35f4900bfe6b1d0f49844ca3Benjamin Peterson if (min != pos) { 33759dd0e64ad24b0bf35f4900bfe6b1d0f49844ca3Benjamin Peterson min = pos; 33859dd0e64ad24b0bf35f4900bfe6b1d0f49844ca3Benjamin Peterson continue; 33959dd0e64ad24b0bf35f4900bfe6b1d0f49844ca3Benjamin Peterson } 340c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou } 34159dd0e64ad24b0bf35f4900bfe6b1d0f49844ca3Benjamin Peterson break; 34259dd0e64ad24b0bf35f4900bfe6b1d0f49844ca3Benjamin Peterson } 343c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou 34459dd0e64ad24b0bf35f4900bfe6b1d0f49844ca3Benjamin Peterson if (value == haystack[pos].uniseq) { 34559dd0e64ad24b0bf35f4900bfe6b1d0f49844ca3Benjamin Peterson return haystack[pos].code; 34659dd0e64ad24b0bf35f4900bfe6b1d0f49844ca3Benjamin Peterson } 34759dd0e64ad24b0bf35f4900bfe6b1d0f49844ca3Benjamin Peterson return DBCINV; 3482bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang} 3492bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#endif 3502bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 3512bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#ifdef USING_IMPORTED_MAPS 3522bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#define IMPORT_MAP(locale, charset, encmap, decmap) \ 353c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou importmap("_codecs_" #locale, "__map_" #charset, \ 354c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (const void**)encmap, (const void**)decmap) 3552bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 3562bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Changstatic int 3572bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Changimportmap(const char *modname, const char *symbol, 358c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou const void **encmap, const void **decmap) 3592bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang{ 360c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou PyObject *o, *mod; 361c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou 362c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou mod = PyImport_ImportModule((char *)modname); 363c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if (mod == NULL) 364c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou return -1; 365c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou 366c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou o = PyObject_GetAttrString(mod, (char*)symbol); 367c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if (o == NULL) 368c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou goto errorexit; 369c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou else if (!PyCapsule_IsValid(o, PyMultibyteCodec_CAPSULE_NAME)) { 370c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou PyErr_SetString(PyExc_ValueError, 371c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou "map data must be a Capsule."); 372c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou goto errorexit; 373c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou } 374c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou else { 375c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou struct dbcs_map *map; 376c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou map = PyCapsule_GetPointer(o, PyMultibyteCodec_CAPSULE_NAME); 377c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if (encmap != NULL) 378c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou *encmap = map->encmap; 379c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if (decmap != NULL) 380c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou *decmap = map->decmap; 381c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou Py_DECREF(o); 382c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou } 383c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou 384c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou Py_DECREF(mod); 385c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou return 0; 3862bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 3872bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Changerrorexit: 388c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou Py_DECREF(mod); 389c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou return -1; 3902bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang} 3912bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#endif 3922bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 393c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou#define I_AM_A_MODULE_FOR(loc) \ 394c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou void \ 395c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou init_codecs_##loc(void) \ 396c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou { \ 397c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou PyObject *m = Py_InitModule("_codecs_" #loc, __methods);\ 398c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou if (m != NULL) \ 399c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou (void)register_maps(m); \ 400c83ea137d7e717f764e2f31fc2544f522de7d857Antoine Pitrou } 4012bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang 4022bb146f2f4fd52b03cfa7ae739adb35d2b9f5421Hye-Shik Chang#endif 403