10e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader#!/usr/bin/env python 20e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 30e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderimport collections 45dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderimport copy 50e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderimport glob 60e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderfrom os import path 70e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderimport sys 80e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderfrom xml.etree import ElementTree 90e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 100e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderfrom fontTools import ttLib 110e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 125dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh PournaderEMOJI_VS = 0xFE0F 135dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 140e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh PournaderLANG_TO_SCRIPT = { 156c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'as': 'Beng', 16033b2226babcaeeb28cc08de6e2c2304a581bd9fRoozbeh Pournader 'bg': 'Cyrl', 176c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'bn': 'Beng', 18033b2226babcaeeb28cc08de6e2c2304a581bd9fRoozbeh Pournader 'cu': 'Cyrl', 196c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'cy': 'Latn', 206c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'da': 'Latn', 210e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'de': 'Latn', 220e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'en': 'Latn', 230e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'es': 'Latn', 246c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'et': 'Latn', 250e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'eu': 'Latn', 266c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'fr': 'Latn', 276c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'ga': 'Latn', 286c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'gu': 'Gujr', 296c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'hi': 'Deva', 306c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'hr': 'Latn', 310e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'hu': 'Latn', 320e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'hy': 'Armn', 336c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'ja': 'Jpan', 346c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'kn': 'Knda', 356c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'ko': 'Kore', 366c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'ml': 'Mlym', 376c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'mn': 'Cyrl', 386c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'mr': 'Deva', 390e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'nb': 'Latn', 400e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'nn': 'Latn', 416c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'or': 'Orya', 426c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'pa': 'Guru', 430e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'pt': 'Latn', 446c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'sl': 'Latn', 456c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'ta': 'Taml', 466c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'te': 'Telu', 476c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'tk': 'Latn', 480e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader} 490e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 500e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef lang_to_script(lang_code): 510e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader lang = lang_code.lower() 520e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader while lang not in LANG_TO_SCRIPT: 530e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader hyphen_idx = lang.rfind('-') 540e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert hyphen_idx != -1, ( 550e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'We do not know what script the "%s" language is written in.' 560e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader % lang_code) 570e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assumed_script = lang[hyphen_idx+1:] 580e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if len(assumed_script) == 4 and assumed_script.isalpha(): 590e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader # This is actually the script 600e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader return assumed_script.title() 610e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader lang = lang[:hyphen_idx] 620e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader return LANG_TO_SCRIPT[lang] 630e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 640e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 655dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef printable(inp): 665dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if type(inp) is set: # set of character sequences 675dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader return '{' + ', '.join([printable(seq) for seq in inp]) + '}' 685dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if type(inp) is tuple: # character sequence 695dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader return '<' + (', '.join([printable(ch) for ch in inp])) + '>' 705dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader else: # single character 715dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader return 'U+%04X' % inp 725dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 735dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 745dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef open_font(font): 750e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader font_file, index = font 760e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader font_path = path.join(_fonts_dir, font_file) 770e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if index is not None: 785dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader return ttLib.TTFont(font_path, fontNumber=index) 790e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader else: 805dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader return ttLib.TTFont(font_path) 815dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 825dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 835dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef get_best_cmap(font): 845dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader ttfont = open_font(font) 850e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader all_unicode_cmap = None 860e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader bmp_cmap = None 870e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for cmap in ttfont['cmap'].tables: 880e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader specifier = (cmap.format, cmap.platformID, cmap.platEncID) 890e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if specifier == (4, 3, 1): 900e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert bmp_cmap is None, 'More than one BMP cmap in %s' % (font, ) 910e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader bmp_cmap = cmap 920e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader elif specifier == (12, 3, 10): 930e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert all_unicode_cmap is None, ( 940e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'More than one UCS-4 cmap in %s' % (font, )) 950e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader all_unicode_cmap = cmap 960e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 970e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader return all_unicode_cmap.cmap if all_unicode_cmap else bmp_cmap.cmap 980e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 990e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1005dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef get_variation_sequences_cmap(font): 1015dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader ttfont = open_font(font) 1025dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader vs_cmap = None 1035dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for cmap in ttfont['cmap'].tables: 1045dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader specifier = (cmap.format, cmap.platformID, cmap.platEncID) 1055dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if specifier == (14, 0, 5): 1065dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader assert vs_cmap is None, 'More than one VS cmap in %s' % (font, ) 1075dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader vs_cmap = cmap 1085dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader return vs_cmap 1095dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 1105dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 1115dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef get_emoji_map(font): 1125dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # Add normal characters 1135dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader emoji_map = copy.copy(get_best_cmap(font)) 1145dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader reverse_cmap = {glyph: code for code, glyph in emoji_map.items()} 1155dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 1165dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # Add variation sequences 1175dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader vs_dict = get_variation_sequences_cmap(font).uvsDict 1185dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for vs in vs_dict: 1195dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for base, glyph in vs_dict[vs]: 1205dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if glyph is None: 1215dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader emoji_map[(base, vs)] = emoji_map[base] 1225dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader else: 1235dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader emoji_map[(base, vs)] = glyph 1245dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 1255dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # Add GSUB rules 1265dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader ttfont = open_font(font) 1275dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for lookup in ttfont['GSUB'].table.LookupList.Lookup: 128aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader if lookup.LookupType != 4: 129aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader # Other lookups are used in the emoji font for fallback. 130aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader # We ignore them for now. 131aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader continue 1325dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for subtable in lookup.SubTable: 1335dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader ligatures = subtable.ligatures 1345dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for first_glyph in ligatures: 1355dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for ligature in ligatures[first_glyph]: 1365dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence = [first_glyph] + ligature.Component 1375dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence = [reverse_cmap[glyph] for glyph in sequence] 1385dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence = tuple(sequence) 1395dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # Make sure no starting subsequence of 'sequence' has been 1405dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # seen before. 1415dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for sub_len in range(2, len(sequence)+1): 1425dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader subsequence = sequence[:sub_len] 1435dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader assert subsequence not in emoji_map 1445dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader emoji_map[sequence] = ligature.LigGlyph 1455dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 1465dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader return emoji_map 1475dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 1485dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 1490e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef assert_font_supports_any_of_chars(font, chars): 1500e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader best_cmap = get_best_cmap(font) 1510e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for char in chars: 1520e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if char in best_cmap: 1530e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader return 1540e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader sys.exit('None of characters in %s were found in %s' % (chars, font)) 1550e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1560e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 157fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournaderdef assert_font_supports_all_of_chars(font, chars): 158fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader best_cmap = get_best_cmap(font) 159fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader for char in chars: 160fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader assert char in best_cmap, ( 161fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 'U+%04X was not found in %s' % (char, font)) 162fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 163fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 164fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournaderdef assert_font_supports_none_of_chars(font, chars): 165fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader best_cmap = get_best_cmap(font) 166fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader for char in chars: 167fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader assert char not in best_cmap, ( 168fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 'U+%04X was found in %s' % (char, font)) 169fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 170fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 1715dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef assert_font_supports_all_sequences(font, sequences): 1725dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader vs_dict = get_variation_sequences_cmap(font).uvsDict 1735dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for base, vs in sorted(sequences): 1745dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader assert vs in vs_dict and (base, None) in vs_dict[vs], ( 1755dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader '<U+%04X, U+%04X> was not found in %s' % (base, vs, font)) 1765dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 1775dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 1780e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef check_hyphens(hyphens_dir): 1790e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader # Find all the scripts that need automatic hyphenation 1800e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader scripts = set() 1810e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for hyb_file in glob.iglob(path.join(hyphens_dir, '*.hyb')): 1820e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader hyb_file = path.basename(hyb_file) 1830e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert hyb_file.startswith('hyph-'), ( 1840e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'Unknown hyphenation file %s' % hyb_file) 1850e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader lang_code = hyb_file[hyb_file.index('-')+1:hyb_file.index('.')] 1860e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader scripts.add(lang_to_script(lang_code)) 1870e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1880e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader HYPHENS = {0x002D, 0x2010} 1890e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for script in scripts: 1900e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader fonts = _script_to_font_map[script] 1910e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert fonts, 'No fonts found for the "%s" script' % script 1920e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for font in fonts: 1930e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert_font_supports_any_of_chars(font, HYPHENS) 1940e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1950e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1965dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderclass FontRecord(object): 1975dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader def __init__(self, name, scripts, variant, weight, style, font): 1985dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader self.name = name 1995dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader self.scripts = scripts 2005dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader self.variant = variant 2015dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader self.weight = weight 2025dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader self.style = style 2035dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader self.font = font 2045dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 2055dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 2060e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef parse_fonts_xml(fonts_xml_path): 2070e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader global _script_to_font_map, _fallback_chain 2080e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader _script_to_font_map = collections.defaultdict(set) 2090e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader _fallback_chain = [] 2100e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader tree = ElementTree.parse(fonts_xml_path) 2119092dc2fa8457858a8e9eab10d91e36225359f5aSeigo Nonaka families = tree.findall('family') 2129092dc2fa8457858a8e9eab10d91e36225359f5aSeigo Nonaka # Minikin supports up to 254 but users can place their own font at the first 2139092dc2fa8457858a8e9eab10d91e36225359f5aSeigo Nonaka # place. Thus, 253 is the maximum allowed number of font families in the 2149092dc2fa8457858a8e9eab10d91e36225359f5aSeigo Nonaka # default collection. 2159092dc2fa8457858a8e9eab10d91e36225359f5aSeigo Nonaka assert len(families) < 254, ( 2169092dc2fa8457858a8e9eab10d91e36225359f5aSeigo Nonaka 'System font collection can contains up to 253 font families.') 2179092dc2fa8457858a8e9eab10d91e36225359f5aSeigo Nonaka for family in families: 2180e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader name = family.get('name') 2190e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader variant = family.get('variant') 2200e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader langs = family.get('lang') 2210e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if name: 2220e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert variant is None, ( 2230e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'No variant expected for LGC font %s.' % name) 2240e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert langs is None, ( 2250e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'No language expected for LGC fonts %s.' % name) 2260e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader else: 2270e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert variant in {None, 'elegant', 'compact'}, ( 2280e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'Unexpected value for variant: %s' % variant) 2290e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 2300e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if langs: 2310e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader langs = langs.split() 2320e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader scripts = {lang_to_script(lang) for lang in langs} 2330e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader else: 2340e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader scripts = set() 2350e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 2360e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for child in family: 2370e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert child.tag == 'font', ( 2380e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'Unknown tag <%s>' % child.tag) 23988b111401028c33f5d7621eec33c0bae3f5e4c9eJungshik Shin font_file = child.text.rstrip() 2400e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader weight = int(child.get('weight')) 2410e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert weight % 100 == 0, ( 2420e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'Font weight "%d" is not a multiple of 100.' % weight) 2430e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 2440e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader style = child.get('style') 2450e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert style in {'normal', 'italic'}, ( 2460e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'Unknown style "%s"' % style) 2470e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 2480e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader index = child.get('index') 2490e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if index: 2500e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader index = int(index) 2510e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 2525dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader _fallback_chain.append(FontRecord( 2530e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader name, 2540e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader frozenset(scripts), 2550e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader variant, 2560e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader weight, 2570e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader style, 2580e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader (font_file, index))) 2590e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 2600e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if name: # non-empty names are used for default LGC fonts 2610e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader map_scripts = {'Latn', 'Grek', 'Cyrl'} 2620e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader else: 2630e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader map_scripts = scripts 2640e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for script in map_scripts: 2650e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader _script_to_font_map[script].add((font_file, index)) 2660e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 2670e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 2685dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef check_emoji_coverage(all_emoji, equivalent_emoji): 2693b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader emoji_font = get_emoji_font() 2703b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader check_emoji_font_coverage(emoji_font, all_emoji, equivalent_emoji) 271f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt 272f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt 273f874a1949a7516596a0c2f5829e140dc6f69c326Doug Feltdef get_emoji_font(): 2745dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader emoji_fonts = [ 2755dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader record.font for record in _fallback_chain 2765dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if 'Zsye' in record.scripts] 27727ec3ace18e329eb9654f297dedf667f94baaf81Roozbeh Pournader assert len(emoji_fonts) == 1, 'There are %d emoji fonts.' % len(emoji_fonts) 278f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt return emoji_fonts[0] 279f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt 280fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 281f874a1949a7516596a0c2f5829e140dc6f69c326Doug Feltdef check_emoji_font_coverage(emoji_font, all_emoji, equivalent_emoji): 282f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt coverage = get_emoji_map(emoji_font) 2835dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for sequence in all_emoji: 2845dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader assert sequence in coverage, ( 2855dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader '%s is not supported in the emoji font.' % printable(sequence)) 286fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 2875dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for sequence in coverage: 2885dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if sequence in {0x0000, 0x000D, 0x0020}: 2895dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # The font needs to support a few extra characters, which is OK 2905dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader continue 2915dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader assert sequence in all_emoji, ( 2925dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 'Emoji font should not support %s.' % printable(sequence)) 2935dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 2945dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for first, second in sorted(equivalent_emoji.items()): 2955dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader assert coverage[first] == coverage[second], ( 2965dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader '%s and %s should map to the same glyph.' % ( 2975dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader printable(first), 2985dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader printable(second))) 2995dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 3005dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for glyph in set(coverage.values()): 3015dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader maps_to_glyph = [seq for seq in coverage if coverage[seq] == glyph] 3025dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if len(maps_to_glyph) > 1: 3035dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # There are more than one sequences mapping to the same glyph. We 3045dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # need to make sure they were expected to be equivalent. 3055dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader equivalent_seqs = set() 3065dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for seq in maps_to_glyph: 3075dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader equivalent_seq = seq 3085dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader while equivalent_seq in equivalent_emoji: 3095dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader equivalent_seq = equivalent_emoji[equivalent_seq] 3105dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader equivalent_seqs.add(equivalent_seq) 3115dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader assert len(equivalent_seqs) == 1, ( 3125dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 'The sequences %s should not result in the same glyph %s' % ( 3135dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader printable(equivalent_seqs), 3145dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader glyph)) 3153b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader 3165dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 3175dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef check_emoji_defaults(default_emoji): 3185dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader missing_text_chars = _emoji_properties['Emoji'] - default_emoji 319fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader emoji_font_seen = False 3205dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for record in _fallback_chain: 3215dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if 'Zsye' in record.scripts: 322fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader emoji_font_seen = True 323fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader # No need to check the emoji font 324fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader continue 325fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader # For later fonts, we only check them if they have a script 326fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader # defined, since the defined script may get them to a higher 327f9936b9b7cade30306d5f17534256e587c172254yirui # score even if they appear after the emoji font. However, 328f9936b9b7cade30306d5f17534256e587c172254yirui # we should skip checking the text symbols font, since 329f9936b9b7cade30306d5f17534256e587c172254yirui # symbol fonts should be able to override the emoji display 330f9936b9b7cade30306d5f17534256e587c172254yirui # style when 'Zsym' is explicitly specified by the user. 331f9936b9b7cade30306d5f17534256e587c172254yirui if emoji_font_seen and (not record.scripts or 'Zsym' in record.scripts): 332fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader continue 333fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 3347b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader # Check default emoji-style characters 3355dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader assert_font_supports_none_of_chars(record.font, sorted(default_emoji)) 3367b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader 3377b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader # Mark default text-style characters appearing in fonts above the emoji 3387b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader # font as seen 3397b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader if not emoji_font_seen: 3405dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader missing_text_chars -= set(get_best_cmap(record.font)) 3417b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader 3425dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # Noto does not have monochrome glyphs for Unicode 7.0 wingdings and 3435dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # webdings yet. 3447b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader missing_text_chars -= _chars_by_age['7.0'] 3457b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader assert missing_text_chars == set(), ( 3463b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader 'Text style version of some emoji characters are missing: ' + 3473b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader repr(missing_text_chars)) 3487b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader 3497b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader 3507b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader# Setting reverse to true returns a dictionary that maps the values to sets of 3517b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader# characters, useful for some binary properties. Otherwise, we get a 3527b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader# dictionary that maps characters to the property values, assuming there's only 3537b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader# one property in the file. 3547b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournaderdef parse_unicode_datafile(file_path, reverse=False): 3557b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader if reverse: 3567b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader output_dict = collections.defaultdict(set) 3577b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader else: 3587b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader output_dict = {} 3597b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader with open(file_path) as datafile: 3607b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader for line in datafile: 361fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader if '#' in line: 362fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader line = line[:line.index('#')] 363fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader line = line.strip() 364fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader if not line: 365fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader continue 3665dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 3673b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader chars, prop = line.split(';')[:2] 3685dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader chars = chars.strip() 369fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader prop = prop.strip() 3705dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 3715dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if ' ' in chars: # character sequence 3725dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence = [int(ch, 16) for ch in chars.split(' ')] 3735dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader additions = [tuple(sequence)] 3745dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader elif '..' in chars: # character range 3755dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader char_start, char_end = chars.split('..') 3765dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader char_start = int(char_start, 16) 3775dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader char_end = int(char_end, 16) 3785dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader additions = xrange(char_start, char_end+1) 3795dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader else: # singe character 3805dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader additions = [int(chars, 16)] 3817b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader if reverse: 3825dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader output_dict[prop].update(additions) 3837b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader else: 3845dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for addition in additions: 3855dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader assert addition not in output_dict 3865dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader output_dict[addition] = prop 3877b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader return output_dict 3887b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader 3897b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader 390aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournaderdef parse_emoji_variants(file_path): 3915dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader emoji_set = set() 3925dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader text_set = set() 3935dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader with open(file_path) as datafile: 3945dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for line in datafile: 3955dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if '#' in line: 3965dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader line = line[:line.index('#')] 3975dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader line = line.strip() 3985dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if not line: 3995dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader continue 4005dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence, description, _ = line.split(';') 4015dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence = sequence.strip().split(' ') 4025dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader base = int(sequence[0], 16) 4035dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader vs = int(sequence[1], 16) 4045dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader description = description.strip() 4055dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if description == 'text style': 4065dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader text_set.add((base, vs)) 4075dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader elif description == 'emoji style': 4085dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader emoji_set.add((base, vs)) 4095dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader return text_set, emoji_set 4105dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 4115dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 4127b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournaderdef parse_ucd(ucd_path): 4137b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader global _emoji_properties, _chars_by_age 4145dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader global _text_variation_sequences, _emoji_variation_sequences 4155dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader global _emoji_sequences, _emoji_zwj_sequences 4167b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader _emoji_properties = parse_unicode_datafile( 4177b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader path.join(ucd_path, 'emoji-data.txt'), reverse=True) 418f7a68c10c70c0537bb1ea3a6a2b54b8800102859Roozbeh Pournader emoji_properties_additions = parse_unicode_datafile( 419f7a68c10c70c0537bb1ea3a6a2b54b8800102859Roozbeh Pournader path.join(ucd_path, 'additions', 'emoji-data.txt'), reverse=True) 420f7a68c10c70c0537bb1ea3a6a2b54b8800102859Roozbeh Pournader for prop in emoji_properties_additions.keys(): 421f7a68c10c70c0537bb1ea3a6a2b54b8800102859Roozbeh Pournader _emoji_properties[prop].update(emoji_properties_additions[prop]) 422f7a68c10c70c0537bb1ea3a6a2b54b8800102859Roozbeh Pournader 4237b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader _chars_by_age = parse_unicode_datafile( 4247b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader path.join(ucd_path, 'DerivedAge.txt'), reverse=True) 425aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader sequences = parse_emoji_variants( 426aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader path.join(ucd_path, 'emoji-variation-sequences.txt')) 4275dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader _text_variation_sequences, _emoji_variation_sequences = sequences 4285dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader _emoji_sequences = parse_unicode_datafile( 4295dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader path.join(ucd_path, 'emoji-sequences.txt')) 4306e06ad055b35b197b3083728c6c5d311fb12e57aSiyamed Sinir _emoji_sequences.update(parse_unicode_datafile( 4316e06ad055b35b197b3083728c6c5d311fb12e57aSiyamed Sinir path.join(ucd_path, 'additions', 'emoji-sequences.txt'))) 4325dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader _emoji_zwj_sequences = parse_unicode_datafile( 4335dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader path.join(ucd_path, 'emoji-zwj-sequences.txt')) 4341800ba4ffe76de2652151e43efb2a054d105c7adRoozbeh Pournader _emoji_zwj_sequences.update(parse_unicode_datafile( 4351800ba4ffe76de2652151e43efb2a054d105c7adRoozbeh Pournader path.join(ucd_path, 'additions', 'emoji-zwj-sequences.txt'))) 4365dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 4375dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 4385dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef flag_sequence(territory_code): 4395dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader return tuple(0x1F1E6 + ord(ch) - ord('A') for ch in territory_code) 4405dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 4415dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 4425dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh PournaderUNSUPPORTED_FLAGS = frozenset({ 4435dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader flag_sequence('BL'), flag_sequence('BQ'), flag_sequence('DG'), 4445dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader flag_sequence('EA'), flag_sequence('EH'), flag_sequence('FK'), 4455dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader flag_sequence('GF'), flag_sequence('GP'), flag_sequence('GS'), 4465dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader flag_sequence('MF'), flag_sequence('MQ'), flag_sequence('NC'), 4475dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader flag_sequence('PM'), flag_sequence('RE'), flag_sequence('TF'), 448aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader flag_sequence('WF'), flag_sequence('XK'), flag_sequence('YT'), 4495dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader}) 4505dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 4515dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh PournaderEQUIVALENT_FLAGS = { 4525dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader flag_sequence('BV'): flag_sequence('NO'), 4535dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader flag_sequence('CP'): flag_sequence('FR'), 4545dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader flag_sequence('HM'): flag_sequence('AU'), 4555dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader flag_sequence('SJ'): flag_sequence('NO'), 4565dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader flag_sequence('UM'): flag_sequence('US'), 4575dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader} 4585dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 4595dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh PournaderCOMBINING_KEYCAP = 0x20E3 4605dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 4615dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh PournaderLEGACY_ANDROID_EMOJI = { 4625dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE4E5: flag_sequence('JP'), 4635dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE4E6: flag_sequence('US'), 4645dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE4E7: flag_sequence('FR'), 4655dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE4E8: flag_sequence('DE'), 4665dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE4E9: flag_sequence('IT'), 4675dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE4EA: flag_sequence('GB'), 4685dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE4EB: flag_sequence('ES'), 4695dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE4EC: flag_sequence('RU'), 4705dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE4ED: flag_sequence('CN'), 4715dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE4EE: flag_sequence('KR'), 4725dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE82C: (ord('#'), COMBINING_KEYCAP), 4735dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE82E: (ord('1'), COMBINING_KEYCAP), 4745dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE82F: (ord('2'), COMBINING_KEYCAP), 4755dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE830: (ord('3'), COMBINING_KEYCAP), 4765dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE831: (ord('4'), COMBINING_KEYCAP), 4775dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE832: (ord('5'), COMBINING_KEYCAP), 4785dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE833: (ord('6'), COMBINING_KEYCAP), 4795dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE834: (ord('7'), COMBINING_KEYCAP), 4805dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE835: (ord('8'), COMBINING_KEYCAP), 4815dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE836: (ord('9'), COMBINING_KEYCAP), 4825dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE837: (ord('0'), COMBINING_KEYCAP), 4835dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader} 4845dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 4855dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh PournaderZWJ_IDENTICALS = { 4865dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # KISS 4875dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader (0x1F469, 0x200D, 0x2764, 0x200D, 0x1F48B, 0x200D, 0x1F468): 0x1F48F, 4885dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # COUPLE WITH HEART 4895dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader (0x1F469, 0x200D, 0x2764, 0x200D, 0x1F468): 0x1F491, 4905dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # FAMILY 4915dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader (0x1F468, 0x200D, 0x1F469, 0x200D, 0x1F466): 0x1F46A, 4925dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader} 4935dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 494aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh PournaderZWJ = 0x200D 495aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh PournaderFEMALE_SIGN = 0x2640 496aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh PournaderMALE_SIGN = 0x2642 497aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader 498aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh PournaderGENDER_DEFAULTS = [ 499aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x26F9, MALE_SIGN), # PERSON WITH BALL 500aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F3C3, MALE_SIGN), # RUNNER 501aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F3C4, MALE_SIGN), # SURFER 502aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F3CA, MALE_SIGN), # SWIMMER 503aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F3CB, MALE_SIGN), # WEIGHT LIFTER 504aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F3CC, MALE_SIGN), # GOLFER 505aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F46E, MALE_SIGN), # POLICE OFFICER 506aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F46F, FEMALE_SIGN), # WOMAN WITH BUNNY EARS 507aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F471, MALE_SIGN), # PERSON WITH BLOND HAIR 508aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F473, MALE_SIGN), # MAN WITH TURBAN 509aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F477, MALE_SIGN), # CONSTRUCTION WORKER 510aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F481, FEMALE_SIGN), # INFORMATION DESK PERSON 511aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F482, MALE_SIGN), # GUARDSMAN 512aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F486, FEMALE_SIGN), # FACE MASSAGE 513aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F487, FEMALE_SIGN), # HAIRCUT 514aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F575, MALE_SIGN), # SLEUTH OR SPY 515aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F645, FEMALE_SIGN), # FACE WITH NO GOOD GESTURE 516aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F646, FEMALE_SIGN), # FACE WITH OK GESTURE 517aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F647, MALE_SIGN), # PERSON BOWING DEEPLY 518aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F64B, FEMALE_SIGN), # HAPPY PERSON RAISING ONE HAND 519aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F64D, FEMALE_SIGN), # PERSON FROWNING 520aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F64E, FEMALE_SIGN), # PERSON WITH POUTING FACE 521aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F6A3, MALE_SIGN), # ROWBOAT 522aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F6B4, MALE_SIGN), # BICYCLIST 523aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F6B5, MALE_SIGN), # MOUNTAIN BICYCLIST 524aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F6B6, MALE_SIGN), # PEDESTRIAN 525aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F926, FEMALE_SIGN), # FACE PALM 526aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F937, FEMALE_SIGN), # SHRUG 527aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F938, MALE_SIGN), # PERSON DOING CARTWHEEL 528aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F939, MALE_SIGN), # JUGGLING 529aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F93C, MALE_SIGN), # WRESTLERS 530aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F93D, MALE_SIGN), # WATER POLO 531aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F93E, MALE_SIGN), # HANDBALL 532aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F9D6, FEMALE_SIGN), # PERSON IN STEAMY ROOM 533aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F9D7, FEMALE_SIGN), # PERSON CLIMBING 534aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F9D8, FEMALE_SIGN), # PERSON IN LOTUS POSITION 535aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F9D9, FEMALE_SIGN), # MAGE 536aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F9DA, FEMALE_SIGN), # FAIRY 537aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F9DB, FEMALE_SIGN), # VAMPIRE 538aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F9DC, FEMALE_SIGN), # MERPERSON 539aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F9DD, FEMALE_SIGN), # ELF 540aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F9DE, FEMALE_SIGN), # GENIE 541aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader (0x1F9DF, FEMALE_SIGN), # ZOMBIE 542aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader] 543f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt 544f874a1949a7516596a0c2f5829e140dc6f69c326Doug Feltdef is_fitzpatrick_modifier(cp): 5453b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader return 0x1F3FB <= cp <= 0x1F3FF 5463b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader 5473b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader 5483b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournaderdef reverse_emoji(seq): 5493b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader rev = list(reversed(seq)) 5503b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader # if there are fitzpatrick modifiers in the sequence, keep them after 5513b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader # the emoji they modify 5523b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader for i in xrange(1, len(rev)): 5533b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader if is_fitzpatrick_modifier(rev[i-1]): 5543b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader rev[i], rev[i-1] = rev[i-1], rev[i] 5553b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader return tuple(rev) 556f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt 557f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt 5585dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef compute_expected_emoji(): 5595dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader equivalent_emoji = {} 5605dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence_pieces = set() 5615dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_sequences = set() 5625dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_sequences.update(_emoji_variation_sequences) 5635dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 5642b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien # add zwj sequences not in the current emoji-zwj-sequences.txt 5652b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien adjusted_emoji_zwj_sequences = dict(_emoji_zwj_sequences) 5662b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien adjusted_emoji_zwj_sequences.update(_emoji_zwj_sequences) 5672b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien 568aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader # Add empty flag tag sequence that is supported as fallback 569aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader _emoji_sequences[(0x1F3F4, 0xE007F)] = 'Emoji_Tag_Sequence' 570aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader 5715dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for sequence in _emoji_sequences.keys(): 5725dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence = tuple(ch for ch in sequence if ch != EMOJI_VS) 5735dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_sequences.add(sequence) 5745dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence_pieces.update(sequence) 575aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader if _emoji_sequences.get(sequence, None) == 'Emoji_Tag_Sequence': 57663d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournader # Add reverse of all emoji ZWJ sequences, which are added to the 57763d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournader # fonts as a workaround to get the sequences work in RTL text. 578aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader # TODO: test if these are actually needed by Minikin/HarfBuzz. 579aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader reversed_seq = reverse_emoji(sequence) 580aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader all_sequences.add(reversed_seq) 581aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader equivalent_emoji[reversed_seq] = sequence 5825dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 5832b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien for sequence in adjusted_emoji_zwj_sequences.keys(): 5845dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence = tuple(ch for ch in sequence if ch != EMOJI_VS) 5855dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_sequences.add(sequence) 5865dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence_pieces.update(sequence) 5875dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # Add reverse of all emoji ZWJ sequences, which are added to the fonts 5885dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # as a workaround to get the sequences work in RTL text. 5893b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader reversed_seq = reverse_emoji(sequence) 5905dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_sequences.add(reversed_seq) 5915dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader equivalent_emoji[reversed_seq] = sequence 5925dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 593aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader # Remove unsupported flags 594aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader all_sequences.difference_update(UNSUPPORTED_FLAGS) 595aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader 596aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader # Add all tag characters used in flags 597aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader sequence_pieces.update(range(0xE0030, 0xE0039 + 1)) 598aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader sequence_pieces.update(range(0xE0061, 0xE007A + 1)) 5995dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 6005dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_emoji = ( 6015dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader _emoji_properties['Emoji'] | 6025dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_sequences | 6035dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence_pieces | 6045dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader set(LEGACY_ANDROID_EMOJI.keys())) 6055dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader default_emoji = ( 6065dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader _emoji_properties['Emoji_Presentation'] | 6075dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_sequences | 6085dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader set(LEGACY_ANDROID_EMOJI.keys())) 6095dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 6105dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader equivalent_emoji.update(EQUIVALENT_FLAGS) 6115dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader equivalent_emoji.update(LEGACY_ANDROID_EMOJI) 6125dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader equivalent_emoji.update(ZWJ_IDENTICALS) 613aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader 614aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader for ch, gender in GENDER_DEFAULTS: 615aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader equivalent_emoji[(ch, ZWJ, gender)] = ch 616aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader for skin_tone in range(0x1F3FB, 0x1F3FF+1): 617aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader skin_toned = (ch, skin_tone, ZWJ, gender) 618aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader if skin_toned in all_emoji: 619aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader equivalent_emoji[skin_toned] = (ch, skin_tone) 620aa3ee8e079e470c72894f0833ac4eb518143e4ddRoozbeh Pournader 6215dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for seq in _emoji_variation_sequences: 6225dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader equivalent_emoji[seq] = seq[0] 6235dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 6245dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader return all_emoji, default_emoji, equivalent_emoji 625fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 626fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 627bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournaderdef check_vertical_metrics(): 628bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournader for record in _fallback_chain: 629bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournader if record.name in ['sans-serif', 'sans-serif-condensed']: 630bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournader font = open_font(record.font) 631ede3a17f6079b7da62240bbcaf613591ba2fc055Roozbeh Pournader assert font['head'].yMax == 2163 and font['head'].yMin == -555, ( 63263d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournader 'yMax and yMin of %s do not match expected values.' % ( 63363d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournader record.font,)) 634ede3a17f6079b7da62240bbcaf613591ba2fc055Roozbeh Pournader 63563d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournader if record.name in ['sans-serif', 'sans-serif-condensed', 63663d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournader 'serif', 'monospace']: 637ede3a17f6079b7da62240bbcaf613591ba2fc055Roozbeh Pournader font = open_font(record.font) 63863d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournader assert (font['hhea'].ascent == 1900 and 63963d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournader font['hhea'].descent == -500), ( 64063d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournader 'ascent and descent of %s do not match expected ' 64163d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournader 'values.' % (record.font,)) 64263d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournader 64363d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournader 64463d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournaderdef check_cjk_punctuation(): 64563d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournader cjk_scripts = {'Hans', 'Hant', 'Jpan', 'Kore'} 64663d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournader cjk_punctuation = range(0x3000, 0x301F + 1) 64763d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournader for record in _fallback_chain: 64863d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournader if record.scripts.intersection(cjk_scripts): 64963d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournader # CJK font seen. Stop checking the rest of the fonts. 65063d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournader break 65163d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournader assert_font_supports_none_of_chars(record.font, cjk_punctuation) 652bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournader 653bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournader 6540e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef main(): 6550e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader global _fonts_dir 656f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt target_out = sys.argv[1] 6570e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader _fonts_dir = path.join(target_out, 'fonts') 6580e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 6590e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader fonts_xml_path = path.join(target_out, 'etc', 'fonts.xml') 6600e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader parse_fonts_xml(fonts_xml_path) 6610e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 662bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournader check_vertical_metrics() 663bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournader 6640e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader hyphens_dir = path.join(target_out, 'usr', 'hyphen-data') 6650e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader check_hyphens(hyphens_dir) 6660e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 66763d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournader check_cjk_punctuation() 66863d4d0d3580fc0e4d6ae1543b8ae1b186b8119deRoozbeh Pournader 66927ec3ace18e329eb9654f297dedf667f94baaf81Roozbeh Pournader check_emoji = sys.argv[2] 67027ec3ace18e329eb9654f297dedf667f94baaf81Roozbeh Pournader if check_emoji == 'true': 67127ec3ace18e329eb9654f297dedf667f94baaf81Roozbeh Pournader ucd_path = sys.argv[3] 67227ec3ace18e329eb9654f297dedf667f94baaf81Roozbeh Pournader parse_ucd(ucd_path) 6735dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_emoji, default_emoji, equivalent_emoji = compute_expected_emoji() 6745dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader check_emoji_coverage(all_emoji, equivalent_emoji) 6755dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader check_emoji_defaults(default_emoji) 676fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 6770e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 6780e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderif __name__ == '__main__': 6790e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader main() 680