hb-icu.cc revision 63c0ef4a0763e579c9c80887bbfbd2651de05067
1d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod/* 22409d5f8d7dd8b535ce5ea29e933f7db27d33793Behdad Esfahbod * Copyright © 2009 Red Hat, Inc. 32409d5f8d7dd8b535ce5ea29e933f7db27d33793Behdad Esfahbod * Copyright © 2009 Keith Stribley 42409d5f8d7dd8b535ce5ea29e933f7db27d33793Behdad Esfahbod * Copyright © 2011 Google, Inc. 5d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod * 6c755cb3e3ac55156d0d2ec05adea7a650b97cc41Behdad Esfahbod * This is part of HarfBuzz, a text shaping library. 7d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod * 8d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod * Permission is hereby granted, without written agreement and without 9d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod * license or royalty fees, to use, copy, modify, and distribute this 10d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod * software and its documentation for any purpose, provided that the 11d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod * above copyright notice and the following two paragraphs appear in 12d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod * all copies of this software. 13d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod * 14d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 15d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 16d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 17d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 18d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod * DAMAGE. 19d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod * 20d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 21d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 22d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 23d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 24d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 25d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod * 26d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod * Red Hat Author(s): Behdad Esfahbod 272409d5f8d7dd8b535ce5ea29e933f7db27d33793Behdad Esfahbod * Google Author(s): Behdad Esfahbod 28d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod */ 29d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod 30c57d454accff66e5f2c58006e8fb40bc020b6182Behdad Esfahbod#include "hb-private.hh" 31d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod 32d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod#include "hb-icu.h" 33d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod 34fb194b8794898f51eb596fa4092c26606889d376Behdad Esfahbod#include "hb-unicode-private.hh" 35d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod 364fa67f34ecc65056ce60a572213fbdae66e0423bBehdad Esfahbod#include <unicode/uversion.h> 37d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod#include <unicode/uchar.h> 38498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod#include <unicode/unorm.h> 39498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod#include <unicode/unistr.h> 40d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod 41acdba3f90b232fc12fcb200dca2584481b339118Behdad EsfahbodHB_BEGIN_DECLS 42acdba3f90b232fc12fcb200dca2584481b339118Behdad Esfahbod 43acdba3f90b232fc12fcb200dca2584481b339118Behdad Esfahbod 44f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbodhb_script_t 45f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbodhb_icu_script_to_script (UScriptCode script) 46d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod{ 474d559cddbb3b3a5c12c5167eba69598618a9f283Behdad Esfahbod if (unlikely (script == USCRIPT_INVALID_CODE)) 484d559cddbb3b3a5c12c5167eba69598618a9f283Behdad Esfahbod return HB_SCRIPT_INVALID; 494d559cddbb3b3a5c12c5167eba69598618a9f283Behdad Esfahbod 50f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod return hb_script_from_string (uscript_getShortName (script)); 51d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod} 52d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod 53f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad EsfahbodUScriptCode 54f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbodhb_icu_script_from_script (hb_script_t script) 55d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod{ 564d559cddbb3b3a5c12c5167eba69598618a9f283Behdad Esfahbod if (unlikely (script == HB_SCRIPT_INVALID)) 574d559cddbb3b3a5c12c5167eba69598618a9f283Behdad Esfahbod return USCRIPT_INVALID_CODE; 58afab01cf7caca79cf6dfabe6827d1703be1a74f7Behdad Esfahbod 594d559cddbb3b3a5c12c5167eba69598618a9f283Behdad Esfahbod for (unsigned int i = 0; i < USCRIPT_CODE_LIMIT; i++) 604d559cddbb3b3a5c12c5167eba69598618a9f283Behdad Esfahbod if (unlikely (hb_icu_script_to_script ((UScriptCode) i) == script)) 614d559cddbb3b3a5c12c5167eba69598618a9f283Behdad Esfahbod return (UScriptCode) i; 622fd0c577e322ccbf762927bc4600b3ea31db4c80Ryan Lortie 63f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod return USCRIPT_UNKNOWN; 64f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod} 65f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod 66f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod 67f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbodstatic unsigned int 68fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbodhb_icu_unicode_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED, 69fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod hb_codepoint_t unicode, 70fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod void *user_data HB_UNUSED) 71f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod 72f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod{ 73f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod return u_getCombiningClass (unicode); 74f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod} 75f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod 76f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbodstatic unsigned int 77fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbodhb_icu_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs HB_UNUSED, 78fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod hb_codepoint_t unicode, 79fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod void *user_data HB_UNUSED) 80f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod{ 81f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod switch (u_getIntPropertyValue(unicode, UCHAR_EAST_ASIAN_WIDTH)) 82f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod { 83f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_EA_WIDE: 84f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_EA_FULLWIDTH: 85f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod return 2; 86f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_EA_NEUTRAL: 87f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_EA_AMBIGUOUS: 88f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_EA_HALFWIDTH: 89f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_EA_NARROW: 90f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod return 1; 91f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod } 92f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod return 1; 93f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod} 94f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod 95f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbodstatic hb_unicode_general_category_t 96fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbodhb_icu_unicode_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED, 97fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod hb_codepoint_t unicode, 98fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod void *user_data HB_UNUSED) 99f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod{ 100f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod switch (u_getIntPropertyValue(unicode, UCHAR_GENERAL_CATEGORY)) 101f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod { 102f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_UNASSIGNED: return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED; 103f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod 104f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_UPPERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER; 105f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_LOWERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER; 106f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_TITLECASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER; 107f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_MODIFIER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER; 108f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_OTHER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER; 109f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod 110f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_NON_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK; 111f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_ENCLOSING_MARK: return HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK; 1125157e12a55f943b7fc5be7dce0b2ee1bcacca6ecBehdad Esfahbod case U_COMBINING_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK; 113f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod 114f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_DECIMAL_DIGIT_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER; 115f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_LETTER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER; 116f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_OTHER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER; 117f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod 118f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_SPACE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR; 119f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_LINE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR; 120f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_PARAGRAPH_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR; 121f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod 122f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_CONTROL_CHAR: return HB_UNICODE_GENERAL_CATEGORY_CONTROL; 123f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_FORMAT_CHAR: return HB_UNICODE_GENERAL_CATEGORY_FORMAT; 124f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_PRIVATE_USE_CHAR: return HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE; 125f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_SURROGATE: return HB_UNICODE_GENERAL_CATEGORY_SURROGATE; 126f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod 127f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod 128f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_DASH_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION; 129f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_START_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION; 130f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_END_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION; 131f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_CONNECTOR_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION; 132f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_OTHER_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION; 133f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod 134f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_MATH_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL; 135f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_CURRENCY_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL; 136f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_MODIFIER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL; 137f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_OTHER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL; 138f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod 139f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_INITIAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION; 140f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod case U_FINAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION; 141f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod } 142f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod 143f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED; 144f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod} 145f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod 146f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbodstatic hb_codepoint_t 147fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbodhb_icu_unicode_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED, 148fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod hb_codepoint_t unicode, 149fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod void *user_data HB_UNUSED) 150f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod{ 151f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod return u_charMirror(unicode); 152f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod} 153f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod 154f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbodstatic hb_script_t 155fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbodhb_icu_unicode_script (hb_unicode_funcs_t *ufuncs HB_UNUSED, 156fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod hb_codepoint_t unicode, 157fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod void *user_data HB_UNUSED) 158f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod{ 159f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod UErrorCode status = U_ZERO_ERROR; 160f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod UScriptCode scriptCode = uscript_getScript(unicode, &status); 161f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod 16203034acb8a9fdd33135bc3775a1f932da9ebdd42Behdad Esfahbod if (unlikely (status != U_ZERO_ERROR)) 16303034acb8a9fdd33135bc3775a1f932da9ebdd42Behdad Esfahbod return HB_SCRIPT_UNKNOWN; 16403034acb8a9fdd33135bc3775a1f932da9ebdd42Behdad Esfahbod 165f144a8ea840c6452c1fece2fd988b42a8ea7c5a6Behdad Esfahbod return hb_icu_script_to_script (scriptCode); 166d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod} 167d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod 168fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbodstatic hb_bool_t 169fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbodhb_icu_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED, 170fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod hb_codepoint_t a, 171fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod hb_codepoint_t b, 172fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod hb_codepoint_t *ab, 173fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod void *user_data HB_UNUSED) 174fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod{ 175498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod if (!a || !b) 176498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod return FALSE; 177498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod 178498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod UChar utf16[4], normalized[5]; 179498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod gint len; 180498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod hb_bool_t ret, err; 181498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod UErrorCode icu_err; 182498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod 183498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod len = 0; 184498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod err = FALSE; 185498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), a, err); 186498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod if (err) return FALSE; 187498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), b, err); 188498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod if (err) return FALSE; 189498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod 190498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod icu_err = U_ZERO_ERROR; 191498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod len = unorm_normalize (utf16, len, UNORM_NFC, 0, normalized, ARRAY_LENGTH (normalized), &icu_err); 192498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod if (icu_err) 193498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod return FALSE; 194498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod normalized[len] = 0; 195498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod if (u_strlen (normalized) == 1) { 196498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod U16_GET_UNSAFE (normalized, 0, *ab); 197498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod ret = TRUE; 198498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod } else { 199498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod ret = FALSE; 200498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod } 201498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod 202498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod return ret; 203fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod} 204fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod 205fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbodstatic hb_bool_t 206fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbodhb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED, 207fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod hb_codepoint_t ab, 208fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod hb_codepoint_t *a, 209fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod hb_codepoint_t *b, 210fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod void *user_data HB_UNUSED) 211fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod{ 212498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod UChar utf16[2], normalized[20]; 213498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod gint len; 214498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod hb_bool_t ret, err; 215498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod UErrorCode icu_err; 216498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod 21763c0ef4a0763e579c9c80887bbfbd2651de05067Behdad Esfahbod /* This function is a monster! Maybe it wasn't a good idea adding a 21863c0ef4a0763e579c9c80887bbfbd2651de05067Behdad Esfahbod * pairwise decompose API... */ 21963c0ef4a0763e579c9c80887bbfbd2651de05067Behdad Esfahbod /* Watchout for the dragons. Err, watchout for macros changing len. */ 22063c0ef4a0763e579c9c80887bbfbd2651de05067Behdad Esfahbod 221498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod len = 0; 222498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod err = FALSE; 223498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), ab, err); 224498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod if (err) return FALSE; 225498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod 226498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod icu_err = U_ZERO_ERROR; 227498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod len = unorm_normalize (utf16, len, UNORM_NFD, 0, normalized, ARRAY_LENGTH (normalized), &icu_err); 228498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod if (icu_err) 229498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod return FALSE; 230498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod 231498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod normalized[len] = 0; 232498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod len = u_strlen (normalized); 233498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod 234498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod if (len == 1) { 235498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod U16_GET_UNSAFE (normalized, 0, *a); 236498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod *b = 0; 237498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod ret = *a != ab; 238498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod } else if (len == 2) { 23963c0ef4a0763e579c9c80887bbfbd2651de05067Behdad Esfahbod len =0; 24063c0ef4a0763e579c9c80887bbfbd2651de05067Behdad Esfahbod U16_NEXT_UNSAFE (normalized, len, *a); 24163c0ef4a0763e579c9c80887bbfbd2651de05067Behdad Esfahbod U16_NEXT_UNSAFE (normalized, len, *b); 24263c0ef4a0763e579c9c80887bbfbd2651de05067Behdad Esfahbod 243498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod /* Here's the ugly part: if ab decomposes to a single character and 244498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod * that character decomposes again, we have to detect that and undo 245498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod * the second part :-(. */ 246498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod UChar recomposed[20]; 247498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod icu_err = U_ZERO_ERROR; 24863c0ef4a0763e579c9c80887bbfbd2651de05067Behdad Esfahbod unorm_normalize (normalized, len, UNORM_NFC, 0, recomposed, ARRAY_LENGTH (recomposed), &icu_err); 249498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod if (icu_err) 250498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod return FALSE; 25163c0ef4a0763e579c9c80887bbfbd2651de05067Behdad Esfahbod hb_codepoint_t c; 25263c0ef4a0763e579c9c80887bbfbd2651de05067Behdad Esfahbod U16_GET_UNSAFE (recomposed, 0, c); 25363c0ef4a0763e579c9c80887bbfbd2651de05067Behdad Esfahbod if (c != *a && c != ab) { 25463c0ef4a0763e579c9c80887bbfbd2651de05067Behdad Esfahbod *a = c; 255498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod *b = 0; 256498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod } 257498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod ret = TRUE; 258498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod } else { 259498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod /* If decomposed to more than two characters, take the last one, 260498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod * and recompose the rest to get the first component. */ 261498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod U16_PREV_UNSAFE (normalized, len, *b); 262498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod UChar recomposed[20]; 263498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod icu_err = U_ZERO_ERROR; 264498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod len = unorm_normalize (normalized, len, UNORM_NFC, 0, recomposed, ARRAY_LENGTH (recomposed), &icu_err); 265498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod if (icu_err) 266498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod return FALSE; 267498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod /* We expect that recomposed has exactly one character now. */ 268498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod U16_GET_UNSAFE (recomposed, 0, *a); 269498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod ret = TRUE; 270498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod } 271498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod 272498e1a9be673bb02c00aac3f12bb4c6993a85910Behdad Esfahbod return ret; 273fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod} 274fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod 275d4bee9f813bb299b1c4aab7c33d588be2a7d354bBehdad Esfahbodextern HB_INTERNAL hb_unicode_funcs_t _hb_unicode_funcs_icu; 276d4bee9f813bb299b1c4aab7c33d588be2a7d354bBehdad Esfahbodhb_unicode_funcs_t _hb_icu_unicode_funcs = { 277fca368c4682624346a0aaee690e1ad6ed4c0b337Behdad Esfahbod HB_OBJECT_HEADER_STATIC, 278fca368c4682624346a0aaee690e1ad6ed4c0b337Behdad Esfahbod 279fb194b8794898f51eb596fa4092c26606889d376Behdad Esfahbod NULL, /* parent */ 280d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod TRUE, /* immutable */ 281c442672ec2fb83ed41f3994b3aa4f92a097664abBehdad Esfahbod { 282fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_icu_unicode_##name, 283fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS 284fca0923b04aeff9369849da97d247a647611f346Behdad Esfahbod#undef HB_UNICODE_FUNC_IMPLEMENT 285c442672ec2fb83ed41f3994b3aa4f92a097664abBehdad Esfahbod } 286d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod}; 287d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod 288d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbodhb_unicode_funcs_t * 289d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbodhb_icu_get_unicode_funcs (void) 290d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod{ 291d4bee9f813bb299b1c4aab7c33d588be2a7d354bBehdad Esfahbod return &_hb_icu_unicode_funcs; 292d94647e2cd187bf4a4c8fb1c0c15c3d23c1293acBehdad Esfahbod} 293acdba3f90b232fc12fcb200dca2584481b339118Behdad Esfahbod 294acdba3f90b232fc12fcb200dca2584481b339118Behdad Esfahbod 295acdba3f90b232fc12fcb200dca2584481b339118Behdad EsfahbodHB_END_DECLS 296