fontchain_lint.py revision 6c4f9e0d6f268693663fd5696046172e8c626dd5
10e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader#!/usr/bin/env python
20e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
30e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderimport collections
40e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderimport glob
50e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderfrom os import path
60e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderimport sys
70e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderfrom xml.etree import ElementTree
80e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
90e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderfrom fontTools import ttLib
100e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
110e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh PournaderLANG_TO_SCRIPT = {
126c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'as': 'Beng',
136c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'bn': 'Beng',
146c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'cy': 'Latn',
156c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'da': 'Latn',
160e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    'de': 'Latn',
170e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    'en': 'Latn',
180e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    'es': 'Latn',
196c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'et': 'Latn',
200e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    'eu': 'Latn',
216c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'fr': 'Latn',
226c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'ga': 'Latn',
236c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'gu': 'Gujr',
246c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'hi': 'Deva',
256c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'hr': 'Latn',
260e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    'hu': 'Latn',
270e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    'hy': 'Armn',
286c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'ja': 'Jpan',
296c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'kn': 'Knda',
306c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'ko': 'Kore',
316c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'ml': 'Mlym',
326c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'mn': 'Cyrl',
336c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'mr': 'Deva',
340e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    'nb': 'Latn',
350e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    'nn': 'Latn',
366c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'or': 'Orya',
376c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'pa': 'Guru',
380e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    'pt': 'Latn',
396c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'sl': 'Latn',
406c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'ta': 'Taml',
416c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'te': 'Telu',
426c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin    'tk': 'Latn',
430e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader}
440e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
450e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef lang_to_script(lang_code):
460e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    lang = lang_code.lower()
470e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    while lang not in LANG_TO_SCRIPT:
480e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        hyphen_idx = lang.rfind('-')
490e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        assert hyphen_idx != -1, (
500e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            'We do not know what script the "%s" language is written in.'
510e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            % lang_code)
520e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        assumed_script = lang[hyphen_idx+1:]
530e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        if len(assumed_script) == 4 and assumed_script.isalpha():
540e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            # This is actually the script
550e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            return assumed_script.title()
560e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        lang = lang[:hyphen_idx]
570e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    return LANG_TO_SCRIPT[lang]
580e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
590e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
600e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef get_best_cmap(font):
610e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    font_file, index = font
620e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    font_path = path.join(_fonts_dir, font_file)
630e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    if index is not None:
640e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        ttfont = ttLib.TTFont(font_path, fontNumber=index)
650e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    else:
660e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        ttfont = ttLib.TTFont(font_path)
670e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    all_unicode_cmap = None
680e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    bmp_cmap = None
690e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    for cmap in ttfont['cmap'].tables:
700e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        specifier = (cmap.format, cmap.platformID, cmap.platEncID)
710e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        if specifier == (4, 3, 1):
720e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            assert bmp_cmap is None, 'More than one BMP cmap in %s' % (font, )
730e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            bmp_cmap = cmap
740e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        elif specifier == (12, 3, 10):
750e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            assert all_unicode_cmap is None, (
760e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                'More than one UCS-4 cmap in %s' % (font, ))
770e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            all_unicode_cmap = cmap
780e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
790e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    return all_unicode_cmap.cmap if all_unicode_cmap else bmp_cmap.cmap
800e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
810e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
820e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef assert_font_supports_any_of_chars(font, chars):
830e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    best_cmap = get_best_cmap(font)
840e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    for char in chars:
850e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        if char in best_cmap:
860e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            return
870e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    sys.exit('None of characters in %s were found in %s' % (chars, font))
880e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
890e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
900e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef check_hyphens(hyphens_dir):
910e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    # Find all the scripts that need automatic hyphenation
920e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    scripts = set()
930e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    for hyb_file in glob.iglob(path.join(hyphens_dir, '*.hyb')):
940e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        hyb_file = path.basename(hyb_file)
950e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        assert hyb_file.startswith('hyph-'), (
960e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            'Unknown hyphenation file %s' % hyb_file)
970e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        lang_code = hyb_file[hyb_file.index('-')+1:hyb_file.index('.')]
980e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        scripts.add(lang_to_script(lang_code))
990e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
1000e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    HYPHENS = {0x002D, 0x2010}
1010e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    for script in scripts:
1020e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        fonts = _script_to_font_map[script]
1030e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        assert fonts, 'No fonts found for the "%s" script' % script
1040e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        for font in fonts:
1050e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            assert_font_supports_any_of_chars(font, HYPHENS)
1060e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
1070e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
1080e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef parse_fonts_xml(fonts_xml_path):
1090e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    global _script_to_font_map, _fallback_chain
1100e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    _script_to_font_map = collections.defaultdict(set)
1110e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    _fallback_chain = []
1120e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    tree = ElementTree.parse(fonts_xml_path)
1130e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    for family in tree.findall('family'):
1140e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        name = family.get('name')
1150e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        variant = family.get('variant')
1160e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        langs = family.get('lang')
1170e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        if name:
1180e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            assert variant is None, (
1190e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                'No variant expected for LGC font %s.' % name)
1200e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            assert langs is None, (
1210e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                'No language expected for LGC fonts %s.' % name)
1220e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        else:
1230e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            assert variant in {None, 'elegant', 'compact'}, (
1240e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                'Unexpected value for variant: %s' % variant)
1250e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
1260e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        if langs:
1270e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            langs = langs.split()
1280e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            scripts = {lang_to_script(lang) for lang in langs}
1290e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        else:
1300e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            scripts = set()
1310e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
1320e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader        for child in family:
1330e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            assert child.tag == 'font', (
1340e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                'Unknown tag <%s>' % child.tag)
1350e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            font_file = child.text
1360e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            weight = int(child.get('weight'))
1370e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            assert weight % 100 == 0, (
1380e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                'Font weight "%d" is not a multiple of 100.' % weight)
1390e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
1400e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            style = child.get('style')
1410e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            assert style in {'normal', 'italic'}, (
1420e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                'Unknown style "%s"' % style)
1430e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
1440e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            index = child.get('index')
1450e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            if index:
1460e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                index = int(index)
1470e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
1480e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            _fallback_chain.append((
1490e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                name,
1500e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                frozenset(scripts),
1510e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                variant,
1520e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                weight,
1530e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                style,
1540e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                (font_file, index)))
1550e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
1560e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            if name: # non-empty names are used for default LGC fonts
1570e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                map_scripts = {'Latn', 'Grek', 'Cyrl'}
1580e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            else:
1590e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                map_scripts = scripts
1600e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader            for script in map_scripts:
1610e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader                _script_to_font_map[script].add((font_file, index))
1620e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
1630e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
1640e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef main():
1650e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    target_out = sys.argv[1]
1660e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    global _fonts_dir
1670e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    _fonts_dir = path.join(target_out, 'fonts')
1680e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
1690e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    fonts_xml_path = path.join(target_out, 'etc', 'fonts.xml')
1700e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    parse_fonts_xml(fonts_xml_path)
1710e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
1720e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    hyphens_dir = path.join(target_out, 'usr', 'hyphen-data')
1730e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    check_hyphens(hyphens_dir)
1740e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
1750e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader
1760e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderif __name__ == '__main__':
1770e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader    main()
178