13f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod/*
23f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod * Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
33f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod *
43f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod * Permission to use, copy, modify, and/or distribute this software for any
53f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod * purpose with or without fee is hereby granted, provided that the above
63f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod * copyright notice and this permission notice appear in all copies.
73f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod *
83f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
93f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
103f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
113f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
123f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
133f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
143f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
153f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod */
163f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
173f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod#include <stdio.h>
183f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod#include <stdlib.h>
193f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod#include "ucdn.h"
203f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
213f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbodtypedef struct {
22b5e04c7dc6b78e311d1a14f1f808fac76a64c889Behdad Esfahbod    unsigned char category;
23b5e04c7dc6b78e311d1a14f1f808fac76a64c889Behdad Esfahbod    unsigned char combining;
24b5e04c7dc6b78e311d1a14f1f808fac76a64c889Behdad Esfahbod    unsigned char bidi_class;
25b5e04c7dc6b78e311d1a14f1f808fac76a64c889Behdad Esfahbod    unsigned char mirrored;
26b5e04c7dc6b78e311d1a14f1f808fac76a64c889Behdad Esfahbod    unsigned char east_asian_width;
27b5e04c7dc6b78e311d1a14f1f808fac76a64c889Behdad Esfahbod    unsigned char normalization_check;
28b5e04c7dc6b78e311d1a14f1f808fac76a64c889Behdad Esfahbod    unsigned char script;
293f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod} UCDRecord;
303f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
313f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbodtypedef struct {
323f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    unsigned short from, to;
333f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod} MirrorPair;
343f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
353f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbodtypedef struct {
360cd94491b99aed438ad79a55cdfced8d1b657179Behdad Esfahbod    unsigned int start;
373f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    short count, index;
383f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod} Reindex;
393f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
403f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod#include "unicodedata_db.h"
413f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
423f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod/* constants required for Hangul (de)composition */
433f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod#define SBASE 0xAC00
443f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod#define LBASE 0x1100
453f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod#define VBASE 0x1161
463f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod#define TBASE 0x11A7
473f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod#define SCOUNT 11172
483f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod#define LCOUNT 19
493f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod#define VCOUNT 21
503f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod#define TCOUNT 28
513f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod#define NCOUNT (VCOUNT * TCOUNT)
523f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
535be86b1bb4fbb37b50a1e2798df0c9a3a528b6b2Behdad Esfahbodstatic const UCDRecord *get_ucd_record(uint32_t code)
543f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod{
553f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    int index, offset;
563f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
573f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    if (code >= 0x110000)
583f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        index = 0;
593f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    else {
603f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        index  = index0[code >> (SHIFT1+SHIFT2)] << SHIFT1;
613f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        offset = (code >> SHIFT2) & ((1<<SHIFT1) - 1);
623f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        index  = index1[index + offset] << SHIFT2;
633f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        offset = code & ((1<<SHIFT2) - 1);
643f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        index  = index2[index + offset];
653f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    }
663f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
673f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    return &ucd_records[index];
683f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod}
693f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
705be86b1bb4fbb37b50a1e2798df0c9a3a528b6b2Behdad Esfahbodstatic const unsigned short *get_decomp_record(uint32_t code)
713f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod{
723f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    int index, offset;
733f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
743f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    if (code >= 0x110000)
753f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        index = 0;
763f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    else {
773f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        index  = decomp_index0[code >> (DECOMP_SHIFT1+DECOMP_SHIFT2)]
783f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod            << DECOMP_SHIFT1;
793f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        offset = (code >> DECOMP_SHIFT2) & ((1<<DECOMP_SHIFT1) - 1);
803f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        index  = decomp_index1[index + offset] << DECOMP_SHIFT2;
813f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        offset = code & ((1<<DECOMP_SHIFT2) - 1);
823f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        index  = decomp_index2[index + offset];
833f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    }
843f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
853f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    return &decomp_data[index];
863f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod}
873f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
88b5e04c7dc6b78e311d1a14f1f808fac76a64c889Behdad Esfahbodstatic int get_comp_index(uint32_t code, const Reindex *idx)
893f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod{
903f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    int i;
913f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
923f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    for (i = 0; idx[i].start; i++) {
935be86b1bb4fbb37b50a1e2798df0c9a3a528b6b2Behdad Esfahbod        const Reindex *cur = &idx[i];
943f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        if (code < cur->start)
953f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod            return -1;
963f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        if (code <= cur->start + cur->count) {
973f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod            return cur->index + (code - cur->start);
983f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        }
993f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    }
1003f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
1013f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    return -1;
1023f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod}
1033f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
1043f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbodstatic int compare_mp(const void *a, const void *b)
1053f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod{
1063f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    MirrorPair *mpa = (MirrorPair *)a;
1073f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    MirrorPair *mpb = (MirrorPair *)b;
1083f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    return mpa->from - mpb->from;
1093f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod}
1103f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
1113f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbodstatic int hangul_pair_decompose(uint32_t code, uint32_t *a, uint32_t *b)
1123f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod{
1133f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    int si = code - SBASE;
1143f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
1153f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    if (si < 0 || si >= SCOUNT)
1163f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        return 0;
1173f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
1183f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    if (si % TCOUNT) {
1193f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        /* LV,T */
1203f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        *a = SBASE + (si / TCOUNT) * TCOUNT;
1213f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        *b = TBASE + (si % TCOUNT);
1223f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        return 3;
1233f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    } else {
1243f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        /* L,V */
1253f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        *a = LBASE + (si / NCOUNT);
1263f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        *b = VBASE + (si % NCOUNT) / TCOUNT;
1273f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        return 2;
1283f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    }
1293f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod}
1303f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
1313f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbodstatic int hangul_pair_compose(uint32_t *code, uint32_t a, uint32_t b)
1323f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod{
1333f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    if (b < VBASE || b >= (TBASE + TCOUNT))
1343f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        return 0;
1353f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
1363f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    if ((a < LBASE || a >= (LBASE + LCOUNT))
1373f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod            && (a < SBASE || a >= (SBASE + SCOUNT)))
1383f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        return 0;
1393f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
1403f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    if (a >= SBASE) {
1413f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        /* LV,T */
1423f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        *code = a + (b - TBASE);
1433f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        return 3;
1443f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    } else {
1453f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        /* L,V */
1463f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        int li = a - LBASE;
1473f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        int vi = b - VBASE;
1483f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        *code = SBASE + li * NCOUNT + vi * TCOUNT;
1493f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        return 2;
1503f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    }
1513f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod}
1523f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
1535be86b1bb4fbb37b50a1e2798df0c9a3a528b6b2Behdad Esfahbodstatic uint32_t decode_utf16(const unsigned short **code_ptr)
1543f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod{
1555be86b1bb4fbb37b50a1e2798df0c9a3a528b6b2Behdad Esfahbod    const unsigned short *code = *code_ptr;
1563f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
1573f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    if ((code[0] & 0xd800) != 0xd800) {
1583f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        *code_ptr += 1;
1593f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        return (uint32_t)code[0];
1603f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    } else {
1613f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        *code_ptr += 2;
1623f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        return 0x10000 + ((uint32_t)code[1] - 0xdc00) +
1633f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod            (((uint32_t)code[0] - 0xd800) << 10);
1643f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    }
1653f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod}
1663f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
1673f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbodconst char *ucdn_get_unicode_version(void)
1683f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod{
1693f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    return UNIDATA_VERSION;
1703f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod}
1713f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
1723f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbodint ucdn_get_combining_class(uint32_t code)
1733f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod{
1743f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    return get_ucd_record(code)->combining;
1753f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod}
1763f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
1773f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbodint ucdn_get_east_asian_width(uint32_t code)
1783f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod{
1793f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    return get_ucd_record(code)->east_asian_width;
1803f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod}
1813f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
1823f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbodint ucdn_get_general_category(uint32_t code)
1833f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod{
1843f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    return get_ucd_record(code)->category;
1853f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod}
1863f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
1873f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbodint ucdn_get_bidi_class(uint32_t code)
1883f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod{
1893f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    return get_ucd_record(code)->bidi_class;
1903f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod}
1913f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
1923f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbodint ucdn_get_mirrored(uint32_t code)
1933f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod{
1943f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    return get_ucd_record(code)->mirrored;
1953f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod}
1963f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
1973f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbodint ucdn_get_script(uint32_t code)
1983f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod{
1993f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    return get_ucd_record(code)->script;
2003f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod}
2013f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
2023f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahboduint32_t ucdn_mirror(uint32_t code)
2033f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod{
2043f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    MirrorPair mp = {0};
2053f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    MirrorPair *res;
2063f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
2073f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    if (get_ucd_record(code)->mirrored == 0)
2083f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        return code;
2093f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
2103f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    mp.from = code;
2113f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    res = bsearch(&mp, mirror_pairs, BIDI_MIRROR_LEN, sizeof(MirrorPair),
2123f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod            compare_mp);
2133f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
2143f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    if (res == NULL)
2153f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        return code;
2163f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    else
2173f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        return res->to;
2183f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod}
2193f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
2203f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbodint ucdn_decompose(uint32_t code, uint32_t *a, uint32_t *b)
2213f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod{
2225be86b1bb4fbb37b50a1e2798df0c9a3a528b6b2Behdad Esfahbod    const unsigned short *rec;
2233f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    int len;
2243f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
2253f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    if (hangul_pair_decompose(code, a, b))
2263f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        return 1;
2273f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
2283f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    rec = get_decomp_record(code);
2293f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    len = rec[0] >> 8;
2303f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
2313f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    if ((rec[0] & 0xff) != 0 || len == 0)
2323f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        return 0;
2333f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
2343f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    rec++;
2353f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    *a = decode_utf16(&rec);
2363f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    if (len > 1)
2373f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        *b = decode_utf16(&rec);
2383f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    else
2393f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        *b = 0;
2403f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
2413f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    return 1;
2423f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod}
2433f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
2443f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbodint ucdn_compose(uint32_t *code, uint32_t a, uint32_t b)
2453f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod{
2463f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    int l, r, index, indexi, offset;
2473f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
2483f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    if (hangul_pair_compose(code, a, b))
2493f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        return 1;
2503f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
2513f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    l = get_comp_index(a, nfc_first);
2523f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    r = get_comp_index(b, nfc_last);
2533f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
2543f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    if (l < 0 || r < 0)
2553f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        return 0;
2563f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
2573f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    indexi = l * TOTAL_LAST + r;
2583f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    index  = comp_index0[indexi >> (COMP_SHIFT1+COMP_SHIFT2)] << COMP_SHIFT1;
2593f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    offset = (indexi >> COMP_SHIFT2) & ((1<<COMP_SHIFT1) - 1);
2603f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    index  = comp_index1[index + offset] << COMP_SHIFT2;
2613f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    offset = indexi & ((1<<COMP_SHIFT2) - 1);
2623f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    *code  = comp_data[index + offset];
2633f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
2643f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    return *code != 0;
2653f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod}
2663f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
2673f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbodint ucdn_compat_decompose(uint32_t code, uint32_t *decomposed)
2683f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod{
2693f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    int i, len;
2705be86b1bb4fbb37b50a1e2798df0c9a3a528b6b2Behdad Esfahbod    const unsigned short *rec = get_decomp_record(code);
2713f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    len = rec[0] >> 8;
2723f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
2733f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    if (len == 0)
2743f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        return 0;
2753f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
2763f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    rec++;
2773f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    for (i = 0; i < len; i++)
2783f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod        decomposed[i] = decode_utf16(&rec);
2793f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod
2803f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod    return len;
2813f33f0d1f2603f22e86adffe3c3836136ba5e5fdBehdad Esfahbod}
282