fontchain_lint.py revision 2b8b819fee939c1bca6347a74b42272bc7008fd5
10e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader#!/usr/bin/env python 20e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 30e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderimport collections 45dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderimport copy 50e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderimport glob 65dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderimport itertools 70e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderfrom os import path 80e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderimport sys 90e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderfrom xml.etree import ElementTree 100e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 110e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderfrom fontTools import ttLib 120e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 135dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh PournaderEMOJI_VS = 0xFE0F 145dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 150e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh PournaderLANG_TO_SCRIPT = { 166c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'as': 'Beng', 176c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'bn': 'Beng', 186c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'cy': 'Latn', 196c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'da': 'Latn', 200e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'de': 'Latn', 210e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'en': 'Latn', 220e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'es': 'Latn', 236c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'et': 'Latn', 240e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'eu': 'Latn', 256c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'fr': 'Latn', 266c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'ga': 'Latn', 276c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'gu': 'Gujr', 286c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'hi': 'Deva', 296c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'hr': 'Latn', 300e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'hu': 'Latn', 310e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'hy': 'Armn', 326c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'ja': 'Jpan', 336c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'kn': 'Knda', 346c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'ko': 'Kore', 356c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'ml': 'Mlym', 366c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'mn': 'Cyrl', 376c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'mr': 'Deva', 380e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'nb': 'Latn', 390e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'nn': 'Latn', 406c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'or': 'Orya', 416c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'pa': 'Guru', 420e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'pt': 'Latn', 436c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'sl': 'Latn', 446c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'ta': 'Taml', 456c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'te': 'Telu', 466c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'tk': 'Latn', 470e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader} 480e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 490e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef lang_to_script(lang_code): 500e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader lang = lang_code.lower() 510e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader while lang not in LANG_TO_SCRIPT: 520e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader hyphen_idx = lang.rfind('-') 530e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert hyphen_idx != -1, ( 540e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'We do not know what script the "%s" language is written in.' 550e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader % lang_code) 560e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assumed_script = lang[hyphen_idx+1:] 570e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if len(assumed_script) == 4 and assumed_script.isalpha(): 580e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader # This is actually the script 590e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader return assumed_script.title() 600e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader lang = lang[:hyphen_idx] 610e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader return LANG_TO_SCRIPT[lang] 620e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 630e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 645dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef printable(inp): 655dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if type(inp) is set: # set of character sequences 665dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader return '{' + ', '.join([printable(seq) for seq in inp]) + '}' 675dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if type(inp) is tuple: # character sequence 685dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader return '<' + (', '.join([printable(ch) for ch in inp])) + '>' 695dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader else: # single character 705dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader return 'U+%04X' % inp 715dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 725dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 735dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef open_font(font): 740e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader font_file, index = font 750e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader font_path = path.join(_fonts_dir, font_file) 760e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if index is not None: 775dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader return ttLib.TTFont(font_path, fontNumber=index) 780e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader else: 795dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader return ttLib.TTFont(font_path) 805dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 815dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 825dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef get_best_cmap(font): 835dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader ttfont = open_font(font) 840e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader all_unicode_cmap = None 850e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader bmp_cmap = None 860e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for cmap in ttfont['cmap'].tables: 870e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader specifier = (cmap.format, cmap.platformID, cmap.platEncID) 880e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if specifier == (4, 3, 1): 890e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert bmp_cmap is None, 'More than one BMP cmap in %s' % (font, ) 900e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader bmp_cmap = cmap 910e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader elif specifier == (12, 3, 10): 920e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert all_unicode_cmap is None, ( 930e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'More than one UCS-4 cmap in %s' % (font, )) 940e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader all_unicode_cmap = cmap 950e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 960e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader return all_unicode_cmap.cmap if all_unicode_cmap else bmp_cmap.cmap 970e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 980e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 995dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef get_variation_sequences_cmap(font): 1005dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader ttfont = open_font(font) 1015dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader vs_cmap = None 1025dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for cmap in ttfont['cmap'].tables: 1035dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader specifier = (cmap.format, cmap.platformID, cmap.platEncID) 1045dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if specifier == (14, 0, 5): 1055dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader assert vs_cmap is None, 'More than one VS cmap in %s' % (font, ) 1065dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader vs_cmap = cmap 1075dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader return vs_cmap 1085dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 1095dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 1105dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef get_emoji_map(font): 1115dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # Add normal characters 1125dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader emoji_map = copy.copy(get_best_cmap(font)) 1135dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader reverse_cmap = {glyph: code for code, glyph in emoji_map.items()} 1145dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 1155dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # Add variation sequences 1165dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader vs_dict = get_variation_sequences_cmap(font).uvsDict 1175dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for vs in vs_dict: 1185dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for base, glyph in vs_dict[vs]: 1195dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if glyph is None: 1205dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader emoji_map[(base, vs)] = emoji_map[base] 1215dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader else: 1225dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader emoji_map[(base, vs)] = glyph 1235dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 1245dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # Add GSUB rules 1255dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader ttfont = open_font(font) 1265dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for lookup in ttfont['GSUB'].table.LookupList.Lookup: 1275dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader assert lookup.LookupType == 4, 'We only understand type 4 lookups' 1285dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for subtable in lookup.SubTable: 1295dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader ligatures = subtable.ligatures 1305dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for first_glyph in ligatures: 1315dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for ligature in ligatures[first_glyph]: 1325dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence = [first_glyph] + ligature.Component 1335dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence = [reverse_cmap[glyph] for glyph in sequence] 1345dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence = tuple(sequence) 1355dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # Make sure no starting subsequence of 'sequence' has been 1365dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # seen before. 1375dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for sub_len in range(2, len(sequence)+1): 1385dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader subsequence = sequence[:sub_len] 1395dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader assert subsequence not in emoji_map 1405dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader emoji_map[sequence] = ligature.LigGlyph 1415dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 1425dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader return emoji_map 1435dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 1445dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 1450e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef assert_font_supports_any_of_chars(font, chars): 1460e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader best_cmap = get_best_cmap(font) 1470e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for char in chars: 1480e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if char in best_cmap: 1490e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader return 1500e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader sys.exit('None of characters in %s were found in %s' % (chars, font)) 1510e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1520e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 153fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournaderdef assert_font_supports_all_of_chars(font, chars): 154fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader best_cmap = get_best_cmap(font) 155fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader for char in chars: 156fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader assert char in best_cmap, ( 157fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 'U+%04X was not found in %s' % (char, font)) 158fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 159fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 160fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournaderdef assert_font_supports_none_of_chars(font, chars): 161fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader best_cmap = get_best_cmap(font) 162fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader for char in chars: 163fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader assert char not in best_cmap, ( 164fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 'U+%04X was found in %s' % (char, font)) 165fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 166fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 1675dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef assert_font_supports_all_sequences(font, sequences): 1685dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader vs_dict = get_variation_sequences_cmap(font).uvsDict 1695dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for base, vs in sorted(sequences): 1705dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader assert vs in vs_dict and (base, None) in vs_dict[vs], ( 1715dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader '<U+%04X, U+%04X> was not found in %s' % (base, vs, font)) 1725dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 1735dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 1740e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef check_hyphens(hyphens_dir): 1750e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader # Find all the scripts that need automatic hyphenation 1760e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader scripts = set() 1770e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for hyb_file in glob.iglob(path.join(hyphens_dir, '*.hyb')): 1780e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader hyb_file = path.basename(hyb_file) 1790e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert hyb_file.startswith('hyph-'), ( 1800e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'Unknown hyphenation file %s' % hyb_file) 1810e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader lang_code = hyb_file[hyb_file.index('-')+1:hyb_file.index('.')] 1820e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader scripts.add(lang_to_script(lang_code)) 1830e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1840e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader HYPHENS = {0x002D, 0x2010} 1850e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for script in scripts: 1860e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader fonts = _script_to_font_map[script] 1870e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert fonts, 'No fonts found for the "%s" script' % script 1880e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for font in fonts: 1890e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert_font_supports_any_of_chars(font, HYPHENS) 1900e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1910e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1925dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderclass FontRecord(object): 1935dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader def __init__(self, name, scripts, variant, weight, style, font): 1945dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader self.name = name 1955dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader self.scripts = scripts 1965dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader self.variant = variant 1975dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader self.weight = weight 1985dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader self.style = style 1995dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader self.font = font 2005dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 2015dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 2020e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef parse_fonts_xml(fonts_xml_path): 2030e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader global _script_to_font_map, _fallback_chain 2040e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader _script_to_font_map = collections.defaultdict(set) 2050e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader _fallback_chain = [] 2060e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader tree = ElementTree.parse(fonts_xml_path) 2070e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for family in tree.findall('family'): 2080e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader name = family.get('name') 2090e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader variant = family.get('variant') 2100e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader langs = family.get('lang') 2110e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if name: 2120e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert variant is None, ( 2130e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'No variant expected for LGC font %s.' % name) 2140e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert langs is None, ( 2150e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'No language expected for LGC fonts %s.' % name) 2160e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader else: 2170e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert variant in {None, 'elegant', 'compact'}, ( 2180e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'Unexpected value for variant: %s' % variant) 2190e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 2200e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if langs: 2210e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader langs = langs.split() 2220e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader scripts = {lang_to_script(lang) for lang in langs} 2230e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader else: 2240e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader scripts = set() 2250e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 2260e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for child in family: 2270e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert child.tag == 'font', ( 2280e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'Unknown tag <%s>' % child.tag) 2290e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader font_file = child.text 2300e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader weight = int(child.get('weight')) 2310e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert weight % 100 == 0, ( 2320e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'Font weight "%d" is not a multiple of 100.' % weight) 2330e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 2340e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader style = child.get('style') 2350e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert style in {'normal', 'italic'}, ( 2360e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'Unknown style "%s"' % style) 2370e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 2380e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader index = child.get('index') 2390e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if index: 2400e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader index = int(index) 2410e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 2425dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader _fallback_chain.append(FontRecord( 2430e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader name, 2440e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader frozenset(scripts), 2450e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader variant, 2460e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader weight, 2470e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader style, 2480e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader (font_file, index))) 2490e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 2500e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if name: # non-empty names are used for default LGC fonts 2510e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader map_scripts = {'Latn', 'Grek', 'Cyrl'} 2520e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader else: 2530e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader map_scripts = scripts 2540e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for script in map_scripts: 2550e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader _script_to_font_map[script].add((font_file, index)) 2560e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 2570e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 2585dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef check_emoji_coverage(all_emoji, equivalent_emoji): 2593b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader emoji_font = get_emoji_font() 2603b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader check_emoji_font_coverage(emoji_font, all_emoji, equivalent_emoji) 261f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt 262f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt 263f874a1949a7516596a0c2f5829e140dc6f69c326Doug Feltdef get_emoji_font(): 2645dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader emoji_fonts = [ 2655dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader record.font for record in _fallback_chain 2665dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if 'Zsye' in record.scripts] 26727ec3ace18e329eb9654f297dedf667f94baaf81Roozbeh Pournader assert len(emoji_fonts) == 1, 'There are %d emoji fonts.' % len(emoji_fonts) 268f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt return emoji_fonts[0] 269f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt 270fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 271f874a1949a7516596a0c2f5829e140dc6f69c326Doug Feltdef check_emoji_font_coverage(emoji_font, all_emoji, equivalent_emoji): 272f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt coverage = get_emoji_map(emoji_font) 2735dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for sequence in all_emoji: 2745dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader assert sequence in coverage, ( 2755dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader '%s is not supported in the emoji font.' % printable(sequence)) 276fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 2775dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for sequence in coverage: 2785dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if sequence in {0x0000, 0x000D, 0x0020}: 2795dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # The font needs to support a few extra characters, which is OK 2805dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader continue 2815dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader assert sequence in all_emoji, ( 2825dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 'Emoji font should not support %s.' % printable(sequence)) 2835dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 2845dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for first, second in sorted(equivalent_emoji.items()): 2855dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader assert coverage[first] == coverage[second], ( 2865dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader '%s and %s should map to the same glyph.' % ( 2875dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader printable(first), 2885dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader printable(second))) 2895dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 2905dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for glyph in set(coverage.values()): 2915dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader maps_to_glyph = [seq for seq in coverage if coverage[seq] == glyph] 2925dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if len(maps_to_glyph) > 1: 2935dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # There are more than one sequences mapping to the same glyph. We 2945dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # need to make sure they were expected to be equivalent. 2955dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader equivalent_seqs = set() 2965dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for seq in maps_to_glyph: 2975dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader equivalent_seq = seq 2985dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader while equivalent_seq in equivalent_emoji: 2995dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader equivalent_seq = equivalent_emoji[equivalent_seq] 3005dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader equivalent_seqs.add(equivalent_seq) 3015dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader assert len(equivalent_seqs) == 1, ( 3025dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 'The sequences %s should not result in the same glyph %s' % ( 3035dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader printable(equivalent_seqs), 3045dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader glyph)) 3053b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader 3065dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 3075dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef check_emoji_defaults(default_emoji): 3085dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader missing_text_chars = _emoji_properties['Emoji'] - default_emoji 309fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader emoji_font_seen = False 3105dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for record in _fallback_chain: 3115dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if 'Zsye' in record.scripts: 312fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader emoji_font_seen = True 313fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader # No need to check the emoji font 314fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader continue 315fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader # For later fonts, we only check them if they have a script 316fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader # defined, since the defined script may get them to a higher 317fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader # score even if they appear after the emoji font. 3185dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if emoji_font_seen and not record.scripts: 319fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader continue 320fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 3217b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader # Check default emoji-style characters 3225dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader assert_font_supports_none_of_chars(record.font, sorted(default_emoji)) 3237b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader 3247b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader # Mark default text-style characters appearing in fonts above the emoji 3257b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader # font as seen 3267b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader if not emoji_font_seen: 3275dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader missing_text_chars -= set(get_best_cmap(record.font)) 3287b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader 3295dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # Noto does not have monochrome glyphs for Unicode 7.0 wingdings and 3305dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # webdings yet. 3317b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader missing_text_chars -= _chars_by_age['7.0'] 3327b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader assert missing_text_chars == set(), ( 3333b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader 'Text style version of some emoji characters are missing: ' + 3343b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader repr(missing_text_chars)) 3357b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader 3367b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader 3377b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader# Setting reverse to true returns a dictionary that maps the values to sets of 3387b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader# characters, useful for some binary properties. Otherwise, we get a 3397b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader# dictionary that maps characters to the property values, assuming there's only 3407b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader# one property in the file. 3417b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournaderdef parse_unicode_datafile(file_path, reverse=False): 3427b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader if reverse: 3437b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader output_dict = collections.defaultdict(set) 3447b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader else: 3457b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader output_dict = {} 3467b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader with open(file_path) as datafile: 3477b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader for line in datafile: 348fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader if '#' in line: 349fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader line = line[:line.index('#')] 350fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader line = line.strip() 351fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader if not line: 352fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader continue 3535dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 3543b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader chars, prop = line.split(';')[:2] 3555dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader chars = chars.strip() 356fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader prop = prop.strip() 3575dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 3585dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if ' ' in chars: # character sequence 3595dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence = [int(ch, 16) for ch in chars.split(' ')] 3605dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader additions = [tuple(sequence)] 3615dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader elif '..' in chars: # character range 3625dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader char_start, char_end = chars.split('..') 3635dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader char_start = int(char_start, 16) 3645dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader char_end = int(char_end, 16) 3655dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader additions = xrange(char_start, char_end+1) 3665dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader else: # singe character 3675dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader additions = [int(chars, 16)] 3687b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader if reverse: 3695dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader output_dict[prop].update(additions) 3707b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader else: 3715dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for addition in additions: 3725dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader assert addition not in output_dict 3735dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader output_dict[addition] = prop 3747b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader return output_dict 3757b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader 3767b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader 3775dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef parse_standardized_variants(file_path): 3785dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader emoji_set = set() 3795dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader text_set = set() 3805dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader with open(file_path) as datafile: 3815dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for line in datafile: 3825dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if '#' in line: 3835dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader line = line[:line.index('#')] 3845dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader line = line.strip() 3855dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if not line: 3865dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader continue 3875dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence, description, _ = line.split(';') 3885dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence = sequence.strip().split(' ') 3895dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader base = int(sequence[0], 16) 3905dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader vs = int(sequence[1], 16) 3915dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader description = description.strip() 3925dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if description == 'text style': 3935dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader text_set.add((base, vs)) 3945dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader elif description == 'emoji style': 3955dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader emoji_set.add((base, vs)) 3965dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader return text_set, emoji_set 3975dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 3985dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 3997b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournaderdef parse_ucd(ucd_path): 4007b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader global _emoji_properties, _chars_by_age 4015dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader global _text_variation_sequences, _emoji_variation_sequences 4025dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader global _emoji_sequences, _emoji_zwj_sequences 4037b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader _emoji_properties = parse_unicode_datafile( 4047b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader path.join(ucd_path, 'emoji-data.txt'), reverse=True) 4057b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader _chars_by_age = parse_unicode_datafile( 4067b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader path.join(ucd_path, 'DerivedAge.txt'), reverse=True) 4075dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequences = parse_standardized_variants( 4085dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader path.join(ucd_path, 'StandardizedVariants.txt')) 4095dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader _text_variation_sequences, _emoji_variation_sequences = sequences 4105dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader _emoji_sequences = parse_unicode_datafile( 4115dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader path.join(ucd_path, 'emoji-sequences.txt')) 4125dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader _emoji_zwj_sequences = parse_unicode_datafile( 4135dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader path.join(ucd_path, 'emoji-zwj-sequences.txt')) 4145dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 4155dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 4165dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef flag_sequence(territory_code): 4175dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader return tuple(0x1F1E6 + ord(ch) - ord('A') for ch in territory_code) 4185dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 4195dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 4205dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh PournaderUNSUPPORTED_FLAGS = frozenset({ 4215dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader flag_sequence('BL'), flag_sequence('BQ'), flag_sequence('DG'), 4225dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader flag_sequence('EA'), flag_sequence('EH'), flag_sequence('FK'), 4235dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader flag_sequence('GF'), flag_sequence('GP'), flag_sequence('GS'), 4245dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader flag_sequence('MF'), flag_sequence('MQ'), flag_sequence('NC'), 4255dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader flag_sequence('PM'), flag_sequence('RE'), flag_sequence('TF'), 4263b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader flag_sequence('UN'), flag_sequence('WF'), flag_sequence('XK'), 4273b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader flag_sequence('YT'), 4285dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader}) 4295dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 4305dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh PournaderEQUIVALENT_FLAGS = { 4315dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader flag_sequence('BV'): flag_sequence('NO'), 4325dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader flag_sequence('CP'): flag_sequence('FR'), 4335dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader flag_sequence('HM'): flag_sequence('AU'), 4345dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader flag_sequence('SJ'): flag_sequence('NO'), 4355dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader flag_sequence('UM'): flag_sequence('US'), 4365dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader} 4375dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 4385dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh PournaderCOMBINING_KEYCAP = 0x20E3 4395dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 44010ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader# Characters that Android defaults to emoji style, different from the recommendations in UTR #51 44110ea8f7431f3748be4b788100f1cab5f703284acRoozbeh PournaderANDROID_DEFAULT_EMOJI = frozenset({ 44210ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader 0x2600, # BLACK SUN WITH RAYS 44310ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader 0x2601, # CLOUD 44410ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader 0x260E, # BLACK TELEPHONE 44510ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader 0x261D, # WHITE UP POINTING INDEX 44610ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader 0x263A, # WHITE SMILING FACE 44710ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader 0x2660, # BLACK SPADE SUIT 44810ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader 0x2663, # BLACK CLUB SUIT 44910ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader 0x2665, # BLACK HEART SUIT 45010ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader 0x2666, # BLACK DIAMOND SUIT 45110ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader 0x270C, # VICTORY HAND 45210ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader 0x2744, # SNOWFLAKE 45310ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader 0x2764, # HEAVY BLACK HEART 45410ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader}) 45510ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader 4565dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh PournaderLEGACY_ANDROID_EMOJI = { 4575dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE4E5: flag_sequence('JP'), 4585dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE4E6: flag_sequence('US'), 4595dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE4E7: flag_sequence('FR'), 4605dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE4E8: flag_sequence('DE'), 4615dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE4E9: flag_sequence('IT'), 4625dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE4EA: flag_sequence('GB'), 4635dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE4EB: flag_sequence('ES'), 4645dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE4EC: flag_sequence('RU'), 4655dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE4ED: flag_sequence('CN'), 4665dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE4EE: flag_sequence('KR'), 4675dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE82C: (ord('#'), COMBINING_KEYCAP), 4685dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE82E: (ord('1'), COMBINING_KEYCAP), 4695dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE82F: (ord('2'), COMBINING_KEYCAP), 4705dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE830: (ord('3'), COMBINING_KEYCAP), 4715dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE831: (ord('4'), COMBINING_KEYCAP), 4725dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE832: (ord('5'), COMBINING_KEYCAP), 4735dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE833: (ord('6'), COMBINING_KEYCAP), 4745dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE834: (ord('7'), COMBINING_KEYCAP), 4755dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE835: (ord('8'), COMBINING_KEYCAP), 4765dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE836: (ord('9'), COMBINING_KEYCAP), 4775dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 0xFE837: (ord('0'), COMBINING_KEYCAP), 4785dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader} 4795dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 4805dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh PournaderZWJ_IDENTICALS = { 4815dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # KISS 4825dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader (0x1F469, 0x200D, 0x2764, 0x200D, 0x1F48B, 0x200D, 0x1F468): 0x1F48F, 4835dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # COUPLE WITH HEART 4845dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader (0x1F469, 0x200D, 0x2764, 0x200D, 0x1F468): 0x1F491, 4855dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # FAMILY 4865dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader (0x1F468, 0x200D, 0x1F469, 0x200D, 0x1F466): 0x1F46A, 4875dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader} 4885dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 489f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt 490f874a1949a7516596a0c2f5829e140dc6f69c326Doug Feltdef is_fitzpatrick_modifier(cp): 4913b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader return 0x1F3FB <= cp <= 0x1F3FF 4923b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader 4933b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader 4943b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournaderdef reverse_emoji(seq): 4953b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader rev = list(reversed(seq)) 4963b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader # if there are fitzpatrick modifiers in the sequence, keep them after 4973b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader # the emoji they modify 4983b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader for i in xrange(1, len(rev)): 4993b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader if is_fitzpatrick_modifier(rev[i-1]): 5003b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader rev[i], rev[i-1] = rev[i-1], rev[i] 5013b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader return tuple(rev) 502f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt 503f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt 5045dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef compute_expected_emoji(): 5055dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader equivalent_emoji = {} 5065dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence_pieces = set() 5075dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_sequences = set() 5085dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_sequences.update(_emoji_variation_sequences) 5095dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 5102b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien # add zwj sequences not in the current emoji-zwj-sequences.txt 5112b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien adjusted_emoji_zwj_sequences = dict(_emoji_zwj_sequences) 5122b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien adjusted_emoji_zwj_sequences.update(_emoji_zwj_sequences) 5132b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien # single parent families 5142b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien additional_emoji_zwj = ( 5152b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien (0x1F468, 0x200D, 0x1F466), 5162b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien (0x1F468, 0x200D, 0x1F467), 5172b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien (0x1F468, 0x200D, 0x1F466, 0x200D, 0x1F466), 5182b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien (0x1F468, 0x200D, 0x1F467, 0x200D, 0x1F466), 5192b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien (0x1F468, 0x200D, 0x1F467, 0x200D, 0x1F467), 5202b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien (0x1F469, 0x200D, 0x1F466), 5212b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien (0x1F469, 0x200D, 0x1F467), 5222b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien (0x1F469, 0x200D, 0x1F466, 0x200D, 0x1F466), 5232b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien (0x1F469, 0x200D, 0x1F467, 0x200D, 0x1F466), 5242b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien (0x1F469, 0x200D, 0x1F467, 0x200D, 0x1F467), 5252b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien ) 5262b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien # sequences formed from man and woman and optional fitzpatrick modifier 5272b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien modified_extensions = ( 5282b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien 0x2696, 5292b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien 0x2708, 5302b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien 0x1F3A8, 5312b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien 0x1F680, 5322b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien 0x1F692, 5332b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien ) 5342b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien for seq in additional_emoji_zwj: 5352b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien adjusted_emoji_zwj_sequences[seq] = 'Emoji_ZWJ_Sequence' 5362b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien for ext in modified_extensions: 5372b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien for base in (0x1F468, 0x1F469): 5382b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien seq = (base, 0x200D, ext) 5392b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien adjusted_emoji_zwj_sequences[seq] = 'Emoji_ZWJ_Sequence' 5402b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien for modifier in range(0x1F3FB, 0x1F400): 5412b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien seq = (base, modifier, 0x200D, ext) 5422b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien adjusted_emoji_zwj_sequences[seq] = 'Emoji_ZWJ_Sequence' 5432b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien 5445dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for sequence in _emoji_sequences.keys(): 5455dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence = tuple(ch for ch in sequence if ch != EMOJI_VS) 5465dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_sequences.add(sequence) 5475dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence_pieces.update(sequence) 5485dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 5492b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien for sequence in adjusted_emoji_zwj_sequences.keys(): 5505dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence = tuple(ch for ch in sequence if ch != EMOJI_VS) 5515dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_sequences.add(sequence) 5525dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence_pieces.update(sequence) 5535dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # Add reverse of all emoji ZWJ sequences, which are added to the fonts 5545dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # as a workaround to get the sequences work in RTL text. 5553b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader reversed_seq = reverse_emoji(sequence) 5565dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_sequences.add(reversed_seq) 5575dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader equivalent_emoji[reversed_seq] = sequence 5585dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 5595dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # Add all two-letter flag sequences, as even the unsupported ones should 5605dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader # resolve to a flag tofu. 5615dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_letters = [chr(code) for code in range(ord('A'), ord('Z')+1)] 5625dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_two_letter_codes = itertools.product(all_letters, repeat=2) 5635dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_flags = {flag_sequence(code) for code in all_two_letter_codes} 5645dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_sequences.update(all_flags) 5655dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader tofu_flags = UNSUPPORTED_FLAGS | (all_flags - set(_emoji_sequences.keys())) 5665dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 5675dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_emoji = ( 5685dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader _emoji_properties['Emoji'] | 5695dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_sequences | 5705dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader sequence_pieces | 5715dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader set(LEGACY_ANDROID_EMOJI.keys())) 5725dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader default_emoji = ( 5735dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader _emoji_properties['Emoji_Presentation'] | 57410ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader ANDROID_DEFAULT_EMOJI | 5755dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_sequences | 5765dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader set(LEGACY_ANDROID_EMOJI.keys())) 5775dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 5785dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader first_tofu_flag = sorted(tofu_flags)[0] 5795dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for flag in tofu_flags: 5805dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader if flag != first_tofu_flag: 5815dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader equivalent_emoji[flag] = first_tofu_flag 5825dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader equivalent_emoji.update(EQUIVALENT_FLAGS) 5835dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader equivalent_emoji.update(LEGACY_ANDROID_EMOJI) 5845dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader equivalent_emoji.update(ZWJ_IDENTICALS) 5855dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader for seq in _emoji_variation_sequences: 5865dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader equivalent_emoji[seq] = seq[0] 5875dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader 5885dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader return all_emoji, default_emoji, equivalent_emoji 589fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 590fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 591bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournaderdef check_vertical_metrics(): 592bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournader for record in _fallback_chain: 593bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournader if record.name in ['sans-serif', 'sans-serif-condensed']: 594bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournader font = open_font(record.font) 595ede3a17f6079b7da62240bbcaf613591ba2fc055Roozbeh Pournader assert font['head'].yMax == 2163 and font['head'].yMin == -555, ( 596ede3a17f6079b7da62240bbcaf613591ba2fc055Roozbeh Pournader 'yMax and yMin of %s do not match expected values.' % (record.font,)) 597ede3a17f6079b7da62240bbcaf613591ba2fc055Roozbeh Pournader 598ede3a17f6079b7da62240bbcaf613591ba2fc055Roozbeh Pournader if record.name in ['sans-serif', 'sans-serif-condensed', 'serif', 'monospace']: 599ede3a17f6079b7da62240bbcaf613591ba2fc055Roozbeh Pournader font = open_font(record.font) 600ede3a17f6079b7da62240bbcaf613591ba2fc055Roozbeh Pournader assert font['hhea'].ascent == 1900 and font['hhea'].descent == -500, ( 601ede3a17f6079b7da62240bbcaf613591ba2fc055Roozbeh Pournader 'ascent and descent of %s do not match expected values.' % (record.font,)) 602bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournader 603bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournader 6040e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef main(): 6050e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader global _fonts_dir 606f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt target_out = sys.argv[1] 6070e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader _fonts_dir = path.join(target_out, 'fonts') 6080e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 6090e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader fonts_xml_path = path.join(target_out, 'etc', 'fonts.xml') 6100e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader parse_fonts_xml(fonts_xml_path) 6110e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 612bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournader check_vertical_metrics() 613bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournader 6140e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader hyphens_dir = path.join(target_out, 'usr', 'hyphen-data') 6150e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader check_hyphens(hyphens_dir) 6160e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 61727ec3ace18e329eb9654f297dedf667f94baaf81Roozbeh Pournader check_emoji = sys.argv[2] 61827ec3ace18e329eb9654f297dedf667f94baaf81Roozbeh Pournader if check_emoji == 'true': 61927ec3ace18e329eb9654f297dedf667f94baaf81Roozbeh Pournader ucd_path = sys.argv[3] 62027ec3ace18e329eb9654f297dedf667f94baaf81Roozbeh Pournader parse_ucd(ucd_path) 6215dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader all_emoji, default_emoji, equivalent_emoji = compute_expected_emoji() 6225dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader check_emoji_coverage(all_emoji, equivalent_emoji) 6235dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader check_emoji_defaults(default_emoji) 624fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader 6250e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 6260e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderif __name__ == '__main__': 6270e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader main() 628