fontchain_lint.py revision 6c4f9e0d6f268693663fd5696046172e8c626dd5
10e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader#!/usr/bin/env python 20e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 30e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderimport collections 40e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderimport glob 50e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderfrom os import path 60e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderimport sys 70e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderfrom xml.etree import ElementTree 80e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 90e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderfrom fontTools import ttLib 100e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 110e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh PournaderLANG_TO_SCRIPT = { 126c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'as': 'Beng', 136c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'bn': 'Beng', 146c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'cy': 'Latn', 156c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'da': 'Latn', 160e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'de': 'Latn', 170e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'en': 'Latn', 180e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'es': 'Latn', 196c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'et': 'Latn', 200e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'eu': 'Latn', 216c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'fr': 'Latn', 226c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'ga': 'Latn', 236c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'gu': 'Gujr', 246c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'hi': 'Deva', 256c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'hr': 'Latn', 260e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'hu': 'Latn', 270e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'hy': 'Armn', 286c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'ja': 'Jpan', 296c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'kn': 'Knda', 306c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'ko': 'Kore', 316c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'ml': 'Mlym', 326c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'mn': 'Cyrl', 336c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'mr': 'Deva', 340e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'nb': 'Latn', 350e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'nn': 'Latn', 366c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'or': 'Orya', 376c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'pa': 'Guru', 380e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'pt': 'Latn', 396c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'sl': 'Latn', 406c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'ta': 'Taml', 416c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'te': 'Telu', 426c4f9e0d6f268693663fd5696046172e8c626dd5Jungshik Shin 'tk': 'Latn', 430e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader} 440e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 450e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef lang_to_script(lang_code): 460e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader lang = lang_code.lower() 470e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader while lang not in LANG_TO_SCRIPT: 480e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader hyphen_idx = lang.rfind('-') 490e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert hyphen_idx != -1, ( 500e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'We do not know what script the "%s" language is written in.' 510e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader % lang_code) 520e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assumed_script = lang[hyphen_idx+1:] 530e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if len(assumed_script) == 4 and assumed_script.isalpha(): 540e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader # This is actually the script 550e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader return assumed_script.title() 560e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader lang = lang[:hyphen_idx] 570e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader return LANG_TO_SCRIPT[lang] 580e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 590e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 600e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef get_best_cmap(font): 610e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader font_file, index = font 620e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader font_path = path.join(_fonts_dir, font_file) 630e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if index is not None: 640e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader ttfont = ttLib.TTFont(font_path, fontNumber=index) 650e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader else: 660e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader ttfont = ttLib.TTFont(font_path) 670e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader all_unicode_cmap = None 680e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader bmp_cmap = None 690e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for cmap in ttfont['cmap'].tables: 700e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader specifier = (cmap.format, cmap.platformID, cmap.platEncID) 710e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if specifier == (4, 3, 1): 720e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert bmp_cmap is None, 'More than one BMP cmap in %s' % (font, ) 730e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader bmp_cmap = cmap 740e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader elif specifier == (12, 3, 10): 750e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert all_unicode_cmap is None, ( 760e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'More than one UCS-4 cmap in %s' % (font, )) 770e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader all_unicode_cmap = cmap 780e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 790e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader return all_unicode_cmap.cmap if all_unicode_cmap else bmp_cmap.cmap 800e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 810e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 820e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef assert_font_supports_any_of_chars(font, chars): 830e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader best_cmap = get_best_cmap(font) 840e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for char in chars: 850e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if char in best_cmap: 860e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader return 870e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader sys.exit('None of characters in %s were found in %s' % (chars, font)) 880e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 890e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 900e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef check_hyphens(hyphens_dir): 910e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader # Find all the scripts that need automatic hyphenation 920e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader scripts = set() 930e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for hyb_file in glob.iglob(path.join(hyphens_dir, '*.hyb')): 940e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader hyb_file = path.basename(hyb_file) 950e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert hyb_file.startswith('hyph-'), ( 960e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'Unknown hyphenation file %s' % hyb_file) 970e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader lang_code = hyb_file[hyb_file.index('-')+1:hyb_file.index('.')] 980e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader scripts.add(lang_to_script(lang_code)) 990e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1000e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader HYPHENS = {0x002D, 0x2010} 1010e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for script in scripts: 1020e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader fonts = _script_to_font_map[script] 1030e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert fonts, 'No fonts found for the "%s" script' % script 1040e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for font in fonts: 1050e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert_font_supports_any_of_chars(font, HYPHENS) 1060e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1070e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1080e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef parse_fonts_xml(fonts_xml_path): 1090e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader global _script_to_font_map, _fallback_chain 1100e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader _script_to_font_map = collections.defaultdict(set) 1110e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader _fallback_chain = [] 1120e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader tree = ElementTree.parse(fonts_xml_path) 1130e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for family in tree.findall('family'): 1140e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader name = family.get('name') 1150e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader variant = family.get('variant') 1160e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader langs = family.get('lang') 1170e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if name: 1180e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert variant is None, ( 1190e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'No variant expected for LGC font %s.' % name) 1200e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert langs is None, ( 1210e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'No language expected for LGC fonts %s.' % name) 1220e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader else: 1230e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert variant in {None, 'elegant', 'compact'}, ( 1240e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'Unexpected value for variant: %s' % variant) 1250e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1260e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if langs: 1270e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader langs = langs.split() 1280e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader scripts = {lang_to_script(lang) for lang in langs} 1290e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader else: 1300e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader scripts = set() 1310e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1320e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for child in family: 1330e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert child.tag == 'font', ( 1340e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'Unknown tag <%s>' % child.tag) 1350e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader font_file = child.text 1360e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader weight = int(child.get('weight')) 1370e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert weight % 100 == 0, ( 1380e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'Font weight "%d" is not a multiple of 100.' % weight) 1390e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1400e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader style = child.get('style') 1410e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader assert style in {'normal', 'italic'}, ( 1420e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 'Unknown style "%s"' % style) 1430e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1440e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader index = child.get('index') 1450e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if index: 1460e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader index = int(index) 1470e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1480e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader _fallback_chain.append(( 1490e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader name, 1500e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader frozenset(scripts), 1510e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader variant, 1520e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader weight, 1530e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader style, 1540e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader (font_file, index))) 1550e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1560e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader if name: # non-empty names are used for default LGC fonts 1570e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader map_scripts = {'Latn', 'Grek', 'Cyrl'} 1580e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader else: 1590e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader map_scripts = scripts 1600e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader for script in map_scripts: 1610e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader _script_to_font_map[script].add((font_file, index)) 1620e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1630e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1640e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderdef main(): 1650e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader target_out = sys.argv[1] 1660e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader global _fonts_dir 1670e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader _fonts_dir = path.join(target_out, 'fonts') 1680e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1690e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader fonts_xml_path = path.join(target_out, 'etc', 'fonts.xml') 1700e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader parse_fonts_xml(fonts_xml_path) 1710e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1720e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader hyphens_dir = path.join(target_out, 'usr', 'hyphen-data') 1730e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader check_hyphens(hyphens_dir) 1740e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1750e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader 1760e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournaderif __name__ == '__main__': 1770e969e2c0ba9ad863c7fcfc3973a16b1b599e50aRoozbeh Pournader main() 178