fontchain_lint.py revision f9936b9b7cade30306d5f17534256e587c172254
10e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader#!/usr/bin/env python
20e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
30e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderimport collections
45dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderimport copy
50e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderimport glob
65dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderimport itertools
70e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderfrom os import path
80e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderimport sys
90e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderfrom xml.etree import ElementTree
100e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
110e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderfrom fontTools import ttLib
120e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
135dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh PournaderEMOJI_VS = 0xFE0F
145dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
150e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh PournaderLANG_TO_SCRIPT = {
166c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'as': 'Beng',
176c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'bn': 'Beng',
186c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'cy': 'Latn',
196c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'da': 'Latn',
200e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    'de': 'Latn',
210e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    'en': 'Latn',
220e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    'es': 'Latn',
236c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'et': 'Latn',
240e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    'eu': 'Latn',
256c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'fr': 'Latn',
266c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'ga': 'Latn',
276c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'gu': 'Gujr',
286c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'hi': 'Deva',
296c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'hr': 'Latn',
300e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    'hu': 'Latn',
310e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    'hy': 'Armn',
326c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'ja': 'Jpan',
336c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'kn': 'Knda',
346c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'ko': 'Kore',
356c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'ml': 'Mlym',
366c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'mn': 'Cyrl',
376c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'mr': 'Deva',
380e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    'nb': 'Latn',
390e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    'nn': 'Latn',
406c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'or': 'Orya',
416c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'pa': 'Guru',
420e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    'pt': 'Latn',
436c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'sl': 'Latn',
446c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'ta': 'Taml',
456c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'te': 'Telu',
466c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'tk': 'Latn',
470e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader}
480e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
490e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef lang_to_script(lang_code):
500e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    lang = lang_code.lower()
510e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    while lang not in LANG_TO_SCRIPT:
520e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        hyphen_idx = lang.rfind('-')
530e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        assert hyphen_idx != -1, (
540e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            'We do not know what script the "%s" language is written in.'
550e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            % lang_code)
560e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        assumed_script = lang[hyphen_idx+1:]
570e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        if len(assumed_script) == 4 and assumed_script.isalpha():
580e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            # This is actually the script
590e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            return assumed_script.title()
600e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        lang = lang[:hyphen_idx]
610e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    return LANG_TO_SCRIPT[lang]
620e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
630e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
645dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef printable(inp):
655dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    if type(inp) is set:  # set of character sequences
665dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        return '{' + ', '.join([printable(seq) for seq in inp]) + '}'
675dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    if type(inp) is tuple:  # character sequence
685dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        return '<' + (', '.join([printable(ch) for ch in inp])) + '>'
695dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    else:  # single character
705dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        return 'U+%04X' % inp
715dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
725dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
735dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef open_font(font):
740e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    font_file, index = font
750e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    font_path = path.join(_fonts_dir, font_file)
760e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    if index is not None:
775dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        return ttLib.TTFont(font_path, fontNumber=index)
780e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    else:
795dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        return ttLib.TTFont(font_path)
805dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
815dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
825dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef get_best_cmap(font):
835dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    ttfont = open_font(font)
840e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    all_unicode_cmap = None
850e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    bmp_cmap = None
860e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    for cmap in ttfont['cmap'].tables:
870e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        specifier = (cmap.format, cmap.platformID, cmap.platEncID)
880e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        if specifier == (4, 3, 1):
890e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            assert bmp_cmap is None, 'More than one BMP cmap in %s' % (font, )
900e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            bmp_cmap = cmap
910e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        elif specifier == (12, 3, 10):
920e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            assert all_unicode_cmap is None, (
930e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                'More than one UCS-4 cmap in %s' % (font, ))
940e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            all_unicode_cmap = cmap
950e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
960e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    return all_unicode_cmap.cmap if all_unicode_cmap else bmp_cmap.cmap
970e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
980e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
995dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef get_variation_sequences_cmap(font):
1005dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    ttfont = open_font(font)
1015dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    vs_cmap = None
1025dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    for cmap in ttfont['cmap'].tables:
1035dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        specifier = (cmap.format, cmap.platformID, cmap.platEncID)
1045dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        if specifier == (14, 0, 5):
1055dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            assert vs_cmap is None, 'More than one VS cmap in %s' % (font, )
1065dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            vs_cmap = cmap
1075dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    return vs_cmap
1085dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
1095dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
1105dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef get_emoji_map(font):
1115dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    # Add normal characters
1125dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    emoji_map = copy.copy(get_best_cmap(font))
1135dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    reverse_cmap = {glyph: code for code, glyph in emoji_map.items()}
1145dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
1155dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    # Add variation sequences
1165dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    vs_dict = get_variation_sequences_cmap(font).uvsDict
1175dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    for vs in vs_dict:
1185dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        for base, glyph in vs_dict[vs]:
1195dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            if glyph is None:
1205dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                emoji_map[(base, vs)] = emoji_map[base]
1215dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            else:
1225dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                emoji_map[(base, vs)] = glyph
1235dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
1245dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    # Add GSUB rules
1255dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    ttfont = open_font(font)
1265dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    for lookup in ttfont['GSUB'].table.LookupList.Lookup:
1275dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        assert lookup.LookupType == 4, 'We only understand type 4 lookups'
1285dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        for subtable in lookup.SubTable:
1295dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            ligatures = subtable.ligatures
1305dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            for first_glyph in ligatures:
1315dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                for ligature in ligatures[first_glyph]:
1325dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                    sequence = [first_glyph] + ligature.Component
1335dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                    sequence = [reverse_cmap[glyph] for glyph in sequence]
1345dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                    sequence = tuple(sequence)
1355dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                    # Make sure no starting subsequence of 'sequence' has been
1365dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                    # seen before.
1375dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                    for sub_len in range(2, len(sequence)+1):
1385dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                        subsequence = sequence[:sub_len]
1395dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                        assert subsequence not in emoji_map
1405dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                    emoji_map[sequence] = ligature.LigGlyph
1415dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
1425dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    return emoji_map
1435dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
1445dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
1450e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef assert_font_supports_any_of_chars(font, chars):
1460e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    best_cmap = get_best_cmap(font)
1470e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    for char in chars:
1480e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        if char in best_cmap:
1490e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            return
1500e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    sys.exit('None of characters in %s were found in %s' % (chars, font))
1510e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
1520e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
153fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournaderdef assert_font_supports_all_of_chars(font, chars):
154fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader    best_cmap = get_best_cmap(font)
155fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader    for char in chars:
156fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader        assert char in best_cmap, (
157fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader            'U+%04X was not found in %s' % (char, font))
158fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader
159fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader
160fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournaderdef assert_font_supports_none_of_chars(font, chars):
161fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader    best_cmap = get_best_cmap(font)
162fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader    for char in chars:
163fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader        assert char not in best_cmap, (
164fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader            'U+%04X was found in %s' % (char, font))
165fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader
166fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader
1675dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef assert_font_supports_all_sequences(font, sequences):
1685dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    vs_dict = get_variation_sequences_cmap(font).uvsDict
1695dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    for base, vs in sorted(sequences):
1705dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        assert vs in vs_dict and (base, None) in vs_dict[vs], (
1715dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            '<U+%04X, U+%04X> was not found in %s' % (base, vs, font))
1725dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
1735dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
1740e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef check_hyphens(hyphens_dir):
1750e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    # Find all the scripts that need automatic hyphenation
1760e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    scripts = set()
1770e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    for hyb_file in glob.iglob(path.join(hyphens_dir, '*.hyb')):
1780e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        hyb_file = path.basename(hyb_file)
1790e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        assert hyb_file.startswith('hyph-'), (
1800e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            'Unknown hyphenation file %s' % hyb_file)
1810e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        lang_code = hyb_file[hyb_file.index('-')+1:hyb_file.index('.')]
1820e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        scripts.add(lang_to_script(lang_code))
1830e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
1840e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    HYPHENS = {0x002D, 0x2010}
1850e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    for script in scripts:
1860e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        fonts = _script_to_font_map[script]
1870e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        assert fonts, 'No fonts found for the "%s" script' % script
1880e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        for font in fonts:
1890e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            assert_font_supports_any_of_chars(font, HYPHENS)
1900e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
1910e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
1925dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderclass FontRecord(object):
1935dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    def __init__(self, name, scripts, variant, weight, style, font):
1945dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        self.name = name
1955dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        self.scripts = scripts
1965dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        self.variant = variant
1975dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        self.weight = weight
1985dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        self.style = style
1995dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        self.font = font
2005dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
2015dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
2020e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef parse_fonts_xml(fonts_xml_path):
2030e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    global _script_to_font_map, _fallback_chain
2040e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    _script_to_font_map = collections.defaultdict(set)
2050e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    _fallback_chain = []
2060e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    tree = ElementTree.parse(fonts_xml_path)
2070e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    for family in tree.findall('family'):
2080e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        name = family.get('name')
2090e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        variant = family.get('variant')
2100e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        langs = family.get('lang')
2110e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        if name:
2120e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            assert variant is None, (
2130e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                'No variant expected for LGC font %s.' % name)
2140e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            assert langs is None, (
2150e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                'No language expected for LGC fonts %s.' % name)
2160e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        else:
2170e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            assert variant in {None, 'elegant', 'compact'}, (
2180e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                'Unexpected value for variant: %s' % variant)
2190e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
2200e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        if langs:
2210e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            langs = langs.split()
2220e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            scripts = {lang_to_script(lang) for lang in langs}
2230e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        else:
2240e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            scripts = set()
2250e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
2260e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        for child in family:
2270e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            assert child.tag == 'font', (
2280e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                'Unknown tag <%s>' % child.tag)
2290e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            font_file = child.text
2300e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            weight = int(child.get('weight'))
2310e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            assert weight % 100 == 0, (
2320e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                'Font weight "%d" is not a multiple of 100.' % weight)
2330e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
2340e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            style = child.get('style')
2350e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            assert style in {'normal', 'italic'}, (
2360e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                'Unknown style "%s"' % style)
2370e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
2380e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            index = child.get('index')
2390e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            if index:
2400e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                index = int(index)
2410e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
2425dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            _fallback_chain.append(FontRecord(
2430e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                name,
2440e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                frozenset(scripts),
2450e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                variant,
2460e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                weight,
2470e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                style,
2480e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                (font_file, index)))
2490e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
2500e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            if name: # non-empty names are used for default LGC fonts
2510e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                map_scripts = {'Latn', 'Grek', 'Cyrl'}
2520e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            else:
2530e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                map_scripts = scripts
2540e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            for script in map_scripts:
2550e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                _script_to_font_map[script].add((font_file, index))
2560e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
2570e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
2585dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef check_emoji_coverage(all_emoji, equivalent_emoji):
2593b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader    emoji_font = get_emoji_font()
2603b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader    check_emoji_font_coverage(emoji_font, all_emoji, equivalent_emoji)
261f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt
262f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt
263f874a1949a7516596a0c2f5829e140dc6f69c326Doug Feltdef get_emoji_font():
2645dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    emoji_fonts = [
2655dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        record.font for record in _fallback_chain
2665dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        if 'Zsye' in record.scripts]
26727ec3ace18e329eb9654f297dedf667f94baaf81Roozbeh Pournader    assert len(emoji_fonts) == 1, 'There are %d emoji fonts.' % len(emoji_fonts)
268f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt    return emoji_fonts[0]
269f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt
270fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader
271f874a1949a7516596a0c2f5829e140dc6f69c326Doug Feltdef check_emoji_font_coverage(emoji_font, all_emoji, equivalent_emoji):
272f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt    coverage = get_emoji_map(emoji_font)
2735dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    for sequence in all_emoji:
2745dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        assert sequence in coverage, (
2755dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            '%s is not supported in the emoji font.' % printable(sequence))
276fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader
2775dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    for sequence in coverage:
2785dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        if sequence in {0x0000, 0x000D, 0x0020}:
2795dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            # The font needs to support a few extra characters, which is OK
2805dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            continue
2815dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        assert sequence in all_emoji, (
2825dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            'Emoji font should not support %s.' % printable(sequence))
2835dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
2845dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    for first, second in sorted(equivalent_emoji.items()):
2855dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        assert coverage[first] == coverage[second], (
2865dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            '%s and %s should map to the same glyph.' % (
2875dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                printable(first),
2885dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                printable(second)))
2895dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
2905dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    for glyph in set(coverage.values()):
2915dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        maps_to_glyph = [seq for seq in coverage if coverage[seq] == glyph]
2925dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        if len(maps_to_glyph) > 1:
2935dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            # There are more than one sequences mapping to the same glyph. We
2945dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            # need to make sure they were expected to be equivalent.
2955dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            equivalent_seqs = set()
2965dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            for seq in maps_to_glyph:
2975dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                equivalent_seq = seq
2985dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                while equivalent_seq in equivalent_emoji:
2995dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                    equivalent_seq = equivalent_emoji[equivalent_seq]
3005dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                equivalent_seqs.add(equivalent_seq)
3015dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            assert len(equivalent_seqs) == 1, (
3025dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                'The sequences %s should not result in the same glyph %s' % (
3035dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                    printable(equivalent_seqs),
3045dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                    glyph))
3053b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader
3065dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
3075dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef check_emoji_defaults(default_emoji):
3085dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    missing_text_chars = _emoji_properties['Emoji'] - default_emoji
309fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader    emoji_font_seen = False
3105dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    for record in _fallback_chain:
3115dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        if 'Zsye' in record.scripts:
312fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader            emoji_font_seen = True
313fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader            # No need to check the emoji font
314fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader            continue
315fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader        # For later fonts, we only check them if they have a script
316fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader        # defined, since the defined script may get them to a higher
317f9936b9b7cade30306d5f17534256e587c172254yirui        # score even if they appear after the emoji font. However,
318f9936b9b7cade30306d5f17534256e587c172254yirui        # we should skip checking the text symbols font, since
319f9936b9b7cade30306d5f17534256e587c172254yirui        # symbol fonts should be able to override the emoji display
320f9936b9b7cade30306d5f17534256e587c172254yirui        # style when 'Zsym' is explicitly specified by the user.
321f9936b9b7cade30306d5f17534256e587c172254yirui        if emoji_font_seen and (not record.scripts or 'Zsym' in record.scripts):
322fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader            continue
323fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader
3247b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader        # Check default emoji-style characters
3255dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        assert_font_supports_none_of_chars(record.font, sorted(default_emoji))
3267b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader
3277b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader        # Mark default text-style characters appearing in fonts above the emoji
3287b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader        # font as seen
3297b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader        if not emoji_font_seen:
3305dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            missing_text_chars -= set(get_best_cmap(record.font))
3317b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader
3325dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    # Noto does not have monochrome glyphs for Unicode 7.0 wingdings and
3335dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    # webdings yet.
3347b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader    missing_text_chars -= _chars_by_age['7.0']
3357b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader    assert missing_text_chars == set(), (
3363b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader        'Text style version of some emoji characters are missing: ' +
3373b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader            repr(missing_text_chars))
3387b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader
3397b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader
3407b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader# Setting reverse to true returns a dictionary that maps the values to sets of
3417b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader# characters, useful for some binary properties. Otherwise, we get a
3427b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader# dictionary that maps characters to the property values, assuming there's only
3437b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader# one property in the file.
3447b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournaderdef parse_unicode_datafile(file_path, reverse=False):
3457b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader    if reverse:
3467b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader        output_dict = collections.defaultdict(set)
3477b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader    else:
3487b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader        output_dict = {}
3497b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader    with open(file_path) as datafile:
3507b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader        for line in datafile:
351fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader            if '#' in line:
352fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader                line = line[:line.index('#')]
353fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader            line = line.strip()
354fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader            if not line:
355fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader                continue
3565dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
3573b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader            chars, prop = line.split(';')[:2]
3585dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            chars = chars.strip()
359fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader            prop = prop.strip()
3605dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
3615dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            if ' ' in chars:  # character sequence
3625dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                sequence = [int(ch, 16) for ch in chars.split(' ')]
3635dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                additions = [tuple(sequence)]
3645dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            elif '..' in chars:  # character range
3655dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                char_start, char_end = chars.split('..')
3665dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                char_start = int(char_start, 16)
3675dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                char_end = int(char_end, 16)
3685dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                additions = xrange(char_start, char_end+1)
3695dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            else:  # singe character
3705dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                additions = [int(chars, 16)]
3717b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader            if reverse:
3725dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                output_dict[prop].update(additions)
3737b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader            else:
3745dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                for addition in additions:
3755dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                    assert addition not in output_dict
3765dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                    output_dict[addition] = prop
3777b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader    return output_dict
3787b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader
3797b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader
3805dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef parse_standardized_variants(file_path):
3815dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    emoji_set = set()
3825dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    text_set = set()
3835dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    with open(file_path) as datafile:
3845dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        for line in datafile:
3855dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            if '#' in line:
3865dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                line = line[:line.index('#')]
3875dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            line = line.strip()
3885dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            if not line:
3895dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                continue
3905dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            sequence, description, _ = line.split(';')
3915dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            sequence = sequence.strip().split(' ')
3925dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            base = int(sequence[0], 16)
3935dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            vs = int(sequence[1], 16)
3945dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            description = description.strip()
3955dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            if description == 'text style':
3965dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                text_set.add((base, vs))
3975dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            elif description == 'emoji style':
3985dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader                emoji_set.add((base, vs))
3995dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    return text_set, emoji_set
4005dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
4015dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
4027b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournaderdef parse_ucd(ucd_path):
4037b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader    global _emoji_properties, _chars_by_age
4045dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    global _text_variation_sequences, _emoji_variation_sequences
4055dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    global _emoji_sequences, _emoji_zwj_sequences
4067b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader    _emoji_properties = parse_unicode_datafile(
4077b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader        path.join(ucd_path, 'emoji-data.txt'), reverse=True)
4087b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader    _chars_by_age = parse_unicode_datafile(
4097b822e5fc155a04fa808b1103da7663bd6dd7ba4Roozbeh Pournader        path.join(ucd_path, 'DerivedAge.txt'), reverse=True)
4105dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    sequences = parse_standardized_variants(
4115dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        path.join(ucd_path, 'StandardizedVariants.txt'))
4125dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    _text_variation_sequences, _emoji_variation_sequences = sequences
4135dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    _emoji_sequences = parse_unicode_datafile(
4145dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        path.join(ucd_path, 'emoji-sequences.txt'))
4155dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    _emoji_zwj_sequences = parse_unicode_datafile(
4165dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        path.join(ucd_path, 'emoji-zwj-sequences.txt'))
4175dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
4185dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
4195dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef flag_sequence(territory_code):
4205dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    return tuple(0x1F1E6 + ord(ch) - ord('A') for ch in territory_code)
4215dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
4225dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
4235dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh PournaderUNSUPPORTED_FLAGS = frozenset({
4245dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    flag_sequence('BL'), flag_sequence('BQ'), flag_sequence('DG'),
4255dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    flag_sequence('EA'), flag_sequence('EH'), flag_sequence('FK'),
4265dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    flag_sequence('GF'), flag_sequence('GP'), flag_sequence('GS'),
4275dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    flag_sequence('MF'), flag_sequence('MQ'), flag_sequence('NC'),
4285dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    flag_sequence('PM'), flag_sequence('RE'), flag_sequence('TF'),
4293b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader    flag_sequence('UN'), flag_sequence('WF'), flag_sequence('XK'),
4303b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader    flag_sequence('YT'),
4315dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader})
4325dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
4335dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh PournaderEQUIVALENT_FLAGS = {
4345dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    flag_sequence('BV'): flag_sequence('NO'),
4355dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    flag_sequence('CP'): flag_sequence('FR'),
4365dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    flag_sequence('HM'): flag_sequence('AU'),
4375dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    flag_sequence('SJ'): flag_sequence('NO'),
4385dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    flag_sequence('UM'): flag_sequence('US'),
4395dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader}
4405dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
4415dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh PournaderCOMBINING_KEYCAP = 0x20E3
4425dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
44310ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader# Characters that Android defaults to emoji style, different from the recommendations in UTR #51
44410ea8f7431f3748be4b788100f1cab5f703284acRoozbeh PournaderANDROID_DEFAULT_EMOJI = frozenset({
44510ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader    0x2600, # BLACK SUN WITH RAYS
44610ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader    0x2601, # CLOUD
44710ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader    0x260E, # BLACK TELEPHONE
44810ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader    0x261D, # WHITE UP POINTING INDEX
44910ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader    0x263A, # WHITE SMILING FACE
45010ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader    0x2660, # BLACK SPADE SUIT
45110ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader    0x2663, # BLACK CLUB SUIT
45210ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader    0x2665, # BLACK HEART SUIT
45310ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader    0x2666, # BLACK DIAMOND SUIT
45410ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader    0x270C, # VICTORY HAND
45510ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader    0x2744, # SNOWFLAKE
45610ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader    0x2764, # HEAVY BLACK HEART
45710ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader})
45810ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader
4595dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh PournaderLEGACY_ANDROID_EMOJI = {
4605dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    0xFE4E5: flag_sequence('JP'),
4615dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    0xFE4E6: flag_sequence('US'),
4625dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    0xFE4E7: flag_sequence('FR'),
4635dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    0xFE4E8: flag_sequence('DE'),
4645dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    0xFE4E9: flag_sequence('IT'),
4655dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    0xFE4EA: flag_sequence('GB'),
4665dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    0xFE4EB: flag_sequence('ES'),
4675dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    0xFE4EC: flag_sequence('RU'),
4685dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    0xFE4ED: flag_sequence('CN'),
4695dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    0xFE4EE: flag_sequence('KR'),
4705dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    0xFE82C: (ord('#'), COMBINING_KEYCAP),
4715dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    0xFE82E: (ord('1'), COMBINING_KEYCAP),
4725dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    0xFE82F: (ord('2'), COMBINING_KEYCAP),
4735dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    0xFE830: (ord('3'), COMBINING_KEYCAP),
4745dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    0xFE831: (ord('4'), COMBINING_KEYCAP),
4755dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    0xFE832: (ord('5'), COMBINING_KEYCAP),
4765dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    0xFE833: (ord('6'), COMBINING_KEYCAP),
4775dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    0xFE834: (ord('7'), COMBINING_KEYCAP),
4785dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    0xFE835: (ord('8'), COMBINING_KEYCAP),
4795dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    0xFE836: (ord('9'), COMBINING_KEYCAP),
4805dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    0xFE837: (ord('0'), COMBINING_KEYCAP),
4815dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader}
4825dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
4835dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh PournaderZWJ_IDENTICALS = {
4845dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    # KISS
4855dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    (0x1F469, 0x200D, 0x2764, 0x200D, 0x1F48B, 0x200D, 0x1F468): 0x1F48F,
4865dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    # COUPLE WITH HEART
4875dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    (0x1F469, 0x200D, 0x2764, 0x200D, 0x1F468): 0x1F491,
4885dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    # FAMILY
4895dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    (0x1F468, 0x200D, 0x1F469, 0x200D, 0x1F466): 0x1F46A,
4905dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader}
4915dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
492f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt
493f874a1949a7516596a0c2f5829e140dc6f69c326Doug Feltdef is_fitzpatrick_modifier(cp):
4943b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader    return 0x1F3FB <= cp <= 0x1F3FF
4953b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader
4963b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader
4973b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournaderdef reverse_emoji(seq):
4983b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader    rev = list(reversed(seq))
4993b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader    # if there are fitzpatrick modifiers in the sequence, keep them after
5003b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader    # the emoji they modify
5013b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader    for i in xrange(1, len(rev)):
5023b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader        if is_fitzpatrick_modifier(rev[i-1]):
5033b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader            rev[i], rev[i-1] = rev[i-1], rev[i]
5043b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader    return tuple(rev)
505f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt
506f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt
5075dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournaderdef compute_expected_emoji():
5085dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    equivalent_emoji = {}
5095dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    sequence_pieces = set()
5105dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    all_sequences = set()
5115dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    all_sequences.update(_emoji_variation_sequences)
5125dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
5132b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien    # add zwj sequences not in the current emoji-zwj-sequences.txt
5142b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien    adjusted_emoji_zwj_sequences = dict(_emoji_zwj_sequences)
5152b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien    adjusted_emoji_zwj_sequences.update(_emoji_zwj_sequences)
5162b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien    # single parent families
5172b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien    additional_emoji_zwj = (
5182b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien        (0x1F468, 0x200D, 0x1F466),
5192b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien        (0x1F468, 0x200D, 0x1F467),
5202b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien        (0x1F468, 0x200D, 0x1F466, 0x200D, 0x1F466),
5212b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien        (0x1F468, 0x200D, 0x1F467, 0x200D, 0x1F466),
5222b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien        (0x1F468, 0x200D, 0x1F467, 0x200D, 0x1F467),
5232b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien        (0x1F469, 0x200D, 0x1F466),
5242b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien        (0x1F469, 0x200D, 0x1F467),
5252b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien        (0x1F469, 0x200D, 0x1F466, 0x200D, 0x1F466),
5262b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien        (0x1F469, 0x200D, 0x1F467, 0x200D, 0x1F466),
5272b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien        (0x1F469, 0x200D, 0x1F467, 0x200D, 0x1F467),
5282b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien    )
5292b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien    # sequences formed from man and woman and optional fitzpatrick modifier
5302b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien    modified_extensions = (
5312b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien        0x2696,
5322b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien        0x2708,
5332b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien        0x1F3A8,
5342b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien        0x1F680,
5352b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien        0x1F692,
5362b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien    )
5372b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien    for seq in additional_emoji_zwj:
5382b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien        adjusted_emoji_zwj_sequences[seq] = 'Emoji_ZWJ_Sequence'
5392b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien    for ext in modified_extensions:
5402b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien        for base in (0x1F468, 0x1F469):
5412b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien            seq = (base, 0x200D, ext)
5422b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien            adjusted_emoji_zwj_sequences[seq] = 'Emoji_ZWJ_Sequence'
5432b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien            for modifier in range(0x1F3FB, 0x1F400):
5442b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien                seq = (base, modifier, 0x200D, ext)
5452b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien                adjusted_emoji_zwj_sequences[seq] = 'Emoji_ZWJ_Sequence'
5462b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien
5475dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    for sequence in _emoji_sequences.keys():
5485dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        sequence = tuple(ch for ch in sequence if ch != EMOJI_VS)
5495dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        all_sequences.add(sequence)
5505dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        sequence_pieces.update(sequence)
5515dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
5522b8b819fee939c1bca6347a74b42272bc7008fd5Raph Levien    for sequence in adjusted_emoji_zwj_sequences.keys():
5535dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        sequence = tuple(ch for ch in sequence if ch != EMOJI_VS)
5545dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        all_sequences.add(sequence)
5555dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        sequence_pieces.update(sequence)
5565dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        # Add reverse of all emoji ZWJ sequences, which are added to the fonts
5575dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        # as a workaround to get the sequences work in RTL text.
5583b3c78e6ba90c58bc8a4cd4409cfc5bc854ddc3bRoozbeh Pournader        reversed_seq = reverse_emoji(sequence)
5595dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        all_sequences.add(reversed_seq)
5605dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        equivalent_emoji[reversed_seq] = sequence
5615dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
5625dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    # Add all two-letter flag sequences, as even the unsupported ones should
5635dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    # resolve to a flag tofu.
5645dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    all_letters = [chr(code) for code in range(ord('A'), ord('Z')+1)]
5655dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    all_two_letter_codes = itertools.product(all_letters, repeat=2)
5665dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    all_flags = {flag_sequence(code) for code in all_two_letter_codes}
5675dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    all_sequences.update(all_flags)
5685dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    tofu_flags = UNSUPPORTED_FLAGS | (all_flags - set(_emoji_sequences.keys()))
5695dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
5705dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    all_emoji = (
5715dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        _emoji_properties['Emoji'] |
5725dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        all_sequences |
5735dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        sequence_pieces |
5745dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        set(LEGACY_ANDROID_EMOJI.keys()))
5755dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    default_emoji = (
5765dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        _emoji_properties['Emoji_Presentation'] |
57710ea8f7431f3748be4b788100f1cab5f703284acRoozbeh Pournader        ANDROID_DEFAULT_EMOJI |
5785dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        all_sequences |
5795dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        set(LEGACY_ANDROID_EMOJI.keys()))
5805dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
5815dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    first_tofu_flag = sorted(tofu_flags)[0]
5825dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    for flag in tofu_flags:
5835dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        if flag != first_tofu_flag:
5845dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader            equivalent_emoji[flag] = first_tofu_flag
5855dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    equivalent_emoji.update(EQUIVALENT_FLAGS)
5865dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    equivalent_emoji.update(LEGACY_ANDROID_EMOJI)
5875dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    equivalent_emoji.update(ZWJ_IDENTICALS)
5885dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    for seq in _emoji_variation_sequences:
5895dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        equivalent_emoji[seq] = seq[0]
5905dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader
5915dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader    return all_emoji, default_emoji, equivalent_emoji
592fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader
593fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader
594bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournaderdef check_vertical_metrics():
595bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournader    for record in _fallback_chain:
596bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournader        if record.name in ['sans-serif', 'sans-serif-condensed']:
597bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournader            font = open_font(record.font)
598ede3a17f6079b7da62240bbcaf613591ba2fc055Roozbeh Pournader            assert font['head'].yMax == 2163 and font['head'].yMin == -555, (
599ede3a17f6079b7da62240bbcaf613591ba2fc055Roozbeh Pournader                'yMax and yMin of %s do not match expected values.' % (record.font,))
600ede3a17f6079b7da62240bbcaf613591ba2fc055Roozbeh Pournader
601ede3a17f6079b7da62240bbcaf613591ba2fc055Roozbeh Pournader        if record.name in ['sans-serif', 'sans-serif-condensed', 'serif', 'monospace']:
602ede3a17f6079b7da62240bbcaf613591ba2fc055Roozbeh Pournader            font = open_font(record.font)
603ede3a17f6079b7da62240bbcaf613591ba2fc055Roozbeh Pournader            assert font['hhea'].ascent == 1900 and font['hhea'].descent == -500, (
604ede3a17f6079b7da62240bbcaf613591ba2fc055Roozbeh Pournader                'ascent and descent of %s do not match expected values.' % (record.font,))
605bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournader
606bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournader
6070e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef main():
6080e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    global _fonts_dir
609f874a1949a7516596a0c2f5829e140dc6f69c326Doug Felt    target_out = sys.argv[1]
6100e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    _fonts_dir = path.join(target_out, 'fonts')
6110e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
6120e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    fonts_xml_path = path.join(target_out, 'etc', 'fonts.xml')
6130e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    parse_fonts_xml(fonts_xml_path)
6140e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
615bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournader    check_vertical_metrics()
616bac1aec6354cc1766cf4ff03578d32d0fa623cb0Roozbeh Pournader
6170e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    hyphens_dir = path.join(target_out, 'usr', 'hyphen-data')
6180e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    check_hyphens(hyphens_dir)
6190e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
62027ec3ace18e329eb9654f297dedf667f94baaf81Roozbeh Pournader    check_emoji = sys.argv[2]
62127ec3ace18e329eb9654f297dedf667f94baaf81Roozbeh Pournader    if check_emoji == 'true':
62227ec3ace18e329eb9654f297dedf667f94baaf81Roozbeh Pournader        ucd_path = sys.argv[3]
62327ec3ace18e329eb9654f297dedf667f94baaf81Roozbeh Pournader        parse_ucd(ucd_path)
6245dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        all_emoji, default_emoji, equivalent_emoji = compute_expected_emoji()
6255dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        check_emoji_coverage(all_emoji, equivalent_emoji)
6265dde087811c255509a72aa9a51c27b40bf0cbf2cRoozbeh Pournader        check_emoji_defaults(default_emoji)
627fa1facc0fd3d04fbc442e23dd8e09f343c8932fcRoozbeh Pournader
6280e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
6290e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderif __name__ == '__main__':
6300e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    main()
631