fontchain_lint.py revision 6e06ad055b35b197b3083728c6c5d311fb12e57a
1#!/usr/bin/env python 2 3import collections 4import copy 5import glob 6import itertools 7from os import path 8import sys 9from xml.etree import ElementTree 10 11from fontTools import ttLib 12 13EMOJI_VS = 0xFE0F 14 15LANG_TO_SCRIPT = { 16 'as': 'Beng', 17 'bg': 'Cyrl', 18 'bn': 'Beng', 19 'cu': 'Cyrl', 20 'cy': 'Latn', 21 'da': 'Latn', 22 'de': 'Latn', 23 'en': 'Latn', 24 'es': 'Latn', 25 'et': 'Latn', 26 'eu': 'Latn', 27 'fr': 'Latn', 28 'ga': 'Latn', 29 'gu': 'Gujr', 30 'hi': 'Deva', 31 'hr': 'Latn', 32 'hu': 'Latn', 33 'hy': 'Armn', 34 'ja': 'Jpan', 35 'kn': 'Knda', 36 'ko': 'Kore', 37 'ml': 'Mlym', 38 'mn': 'Cyrl', 39 'mr': 'Deva', 40 'nb': 'Latn', 41 'nn': 'Latn', 42 'or': 'Orya', 43 'pa': 'Guru', 44 'pt': 'Latn', 45 'sl': 'Latn', 46 'ta': 'Taml', 47 'te': 'Telu', 48 'tk': 'Latn', 49} 50 51def lang_to_script(lang_code): 52 lang = lang_code.lower() 53 while lang not in LANG_TO_SCRIPT: 54 hyphen_idx = lang.rfind('-') 55 assert hyphen_idx != -1, ( 56 'We do not know what script the "%s" language is written in.' 57 % lang_code) 58 assumed_script = lang[hyphen_idx+1:] 59 if len(assumed_script) == 4 and assumed_script.isalpha(): 60 # This is actually the script 61 return assumed_script.title() 62 lang = lang[:hyphen_idx] 63 return LANG_TO_SCRIPT[lang] 64 65 66def printable(inp): 67 if type(inp) is set: # set of character sequences 68 return '{' + ', '.join([printable(seq) for seq in inp]) + '}' 69 if type(inp) is tuple: # character sequence 70 return '<' + (', '.join([printable(ch) for ch in inp])) + '>' 71 else: # single character 72 return 'U+%04X' % inp 73 74 75def open_font(font): 76 font_file, index = font 77 font_path = path.join(_fonts_dir, font_file) 78 if index is not None: 79 return ttLib.TTFont(font_path, fontNumber=index) 80 else: 81 return ttLib.TTFont(font_path) 82 83 84def get_best_cmap(font): 85 ttfont = open_font(font) 86 all_unicode_cmap = None 87 bmp_cmap = None 88 for cmap in ttfont['cmap'].tables: 89 specifier = (cmap.format, cmap.platformID, cmap.platEncID) 90 if specifier == (4, 3, 1): 91 assert bmp_cmap is None, 'More than one BMP cmap in %s' % (font, ) 92 bmp_cmap = cmap 93 elif specifier == (12, 3, 10): 94 assert all_unicode_cmap is None, ( 95 'More than one UCS-4 cmap in %s' % (font, )) 96 all_unicode_cmap = cmap 97 98 return all_unicode_cmap.cmap if all_unicode_cmap else bmp_cmap.cmap 99 100 101def get_variation_sequences_cmap(font): 102 ttfont = open_font(font) 103 vs_cmap = None 104 for cmap in ttfont['cmap'].tables: 105 specifier = (cmap.format, cmap.platformID, cmap.platEncID) 106 if specifier == (14, 0, 5): 107 assert vs_cmap is None, 'More than one VS cmap in %s' % (font, ) 108 vs_cmap = cmap 109 return vs_cmap 110 111 112def get_emoji_map(font): 113 # Add normal characters 114 emoji_map = copy.copy(get_best_cmap(font)) 115 reverse_cmap = {glyph: code for code, glyph in emoji_map.items()} 116 117 # Add variation sequences 118 vs_dict = get_variation_sequences_cmap(font).uvsDict 119 for vs in vs_dict: 120 for base, glyph in vs_dict[vs]: 121 if glyph is None: 122 emoji_map[(base, vs)] = emoji_map[base] 123 else: 124 emoji_map[(base, vs)] = glyph 125 126 # Add GSUB rules 127 ttfont = open_font(font) 128 for lookup in ttfont['GSUB'].table.LookupList.Lookup: 129 if lookup.LookupType != 4: 130 # Other lookups are used in the emoji font for fallback. 131 # We ignore them for now. 132 continue 133 for subtable in lookup.SubTable: 134 ligatures = subtable.ligatures 135 for first_glyph in ligatures: 136 for ligature in ligatures[first_glyph]: 137 sequence = [first_glyph] + ligature.Component 138 sequence = [reverse_cmap[glyph] for glyph in sequence] 139 sequence = tuple(sequence) 140 # Make sure no starting subsequence of 'sequence' has been 141 # seen before. 142 for sub_len in range(2, len(sequence)+1): 143 subsequence = sequence[:sub_len] 144 assert subsequence not in emoji_map 145 emoji_map[sequence] = ligature.LigGlyph 146 147 return emoji_map 148 149 150def assert_font_supports_any_of_chars(font, chars): 151 best_cmap = get_best_cmap(font) 152 for char in chars: 153 if char in best_cmap: 154 return 155 sys.exit('None of characters in %s were found in %s' % (chars, font)) 156 157 158def assert_font_supports_all_of_chars(font, chars): 159 best_cmap = get_best_cmap(font) 160 for char in chars: 161 assert char in best_cmap, ( 162 'U+%04X was not found in %s' % (char, font)) 163 164 165def assert_font_supports_none_of_chars(font, chars): 166 best_cmap = get_best_cmap(font) 167 for char in chars: 168 assert char not in best_cmap, ( 169 'U+%04X was found in %s' % (char, font)) 170 171 172def assert_font_supports_all_sequences(font, sequences): 173 vs_dict = get_variation_sequences_cmap(font).uvsDict 174 for base, vs in sorted(sequences): 175 assert vs in vs_dict and (base, None) in vs_dict[vs], ( 176 '<U+%04X, U+%04X> was not found in %s' % (base, vs, font)) 177 178 179def check_hyphens(hyphens_dir): 180 # Find all the scripts that need automatic hyphenation 181 scripts = set() 182 for hyb_file in glob.iglob(path.join(hyphens_dir, '*.hyb')): 183 hyb_file = path.basename(hyb_file) 184 assert hyb_file.startswith('hyph-'), ( 185 'Unknown hyphenation file %s' % hyb_file) 186 lang_code = hyb_file[hyb_file.index('-')+1:hyb_file.index('.')] 187 scripts.add(lang_to_script(lang_code)) 188 189 HYPHENS = {0x002D, 0x2010} 190 for script in scripts: 191 fonts = _script_to_font_map[script] 192 assert fonts, 'No fonts found for the "%s" script' % script 193 for font in fonts: 194 assert_font_supports_any_of_chars(font, HYPHENS) 195 196 197class FontRecord(object): 198 def __init__(self, name, scripts, variant, weight, style, font): 199 self.name = name 200 self.scripts = scripts 201 self.variant = variant 202 self.weight = weight 203 self.style = style 204 self.font = font 205 206 207def parse_fonts_xml(fonts_xml_path): 208 global _script_to_font_map, _fallback_chain 209 _script_to_font_map = collections.defaultdict(set) 210 _fallback_chain = [] 211 tree = ElementTree.parse(fonts_xml_path) 212 families = tree.findall('family') 213 # Minikin supports up to 254 but users can place their own font at the first 214 # place. Thus, 253 is the maximum allowed number of font families in the 215 # default collection. 216 assert len(families) < 254, ( 217 'System font collection can contains up to 253 font families.') 218 for family in families: 219 name = family.get('name') 220 variant = family.get('variant') 221 langs = family.get('lang') 222 if name: 223 assert variant is None, ( 224 'No variant expected for LGC font %s.' % name) 225 assert langs is None, ( 226 'No language expected for LGC fonts %s.' % name) 227 else: 228 assert variant in {None, 'elegant', 'compact'}, ( 229 'Unexpected value for variant: %s' % variant) 230 231 if langs: 232 langs = langs.split() 233 scripts = {lang_to_script(lang) for lang in langs} 234 else: 235 scripts = set() 236 237 for child in family: 238 assert child.tag == 'font', ( 239 'Unknown tag <%s>' % child.tag) 240 font_file = child.text.rstrip() 241 weight = int(child.get('weight')) 242 assert weight % 100 == 0, ( 243 'Font weight "%d" is not a multiple of 100.' % weight) 244 245 style = child.get('style') 246 assert style in {'normal', 'italic'}, ( 247 'Unknown style "%s"' % style) 248 249 index = child.get('index') 250 if index: 251 index = int(index) 252 253 _fallback_chain.append(FontRecord( 254 name, 255 frozenset(scripts), 256 variant, 257 weight, 258 style, 259 (font_file, index))) 260 261 if name: # non-empty names are used for default LGC fonts 262 map_scripts = {'Latn', 'Grek', 'Cyrl'} 263 else: 264 map_scripts = scripts 265 for script in map_scripts: 266 _script_to_font_map[script].add((font_file, index)) 267 268 269def check_emoji_coverage(all_emoji, equivalent_emoji): 270 emoji_font = get_emoji_font() 271 check_emoji_font_coverage(emoji_font, all_emoji, equivalent_emoji) 272 273 274def get_emoji_font(): 275 emoji_fonts = [ 276 record.font for record in _fallback_chain 277 if 'Zsye' in record.scripts] 278 assert len(emoji_fonts) == 1, 'There are %d emoji fonts.' % len(emoji_fonts) 279 return emoji_fonts[0] 280 281 282def check_emoji_font_coverage(emoji_font, all_emoji, equivalent_emoji): 283 coverage = get_emoji_map(emoji_font) 284 for sequence in all_emoji: 285 assert sequence in coverage, ( 286 '%s is not supported in the emoji font.' % printable(sequence)) 287 288 for sequence in coverage: 289 if sequence in {0x0000, 0x000D, 0x0020}: 290 # The font needs to support a few extra characters, which is OK 291 continue 292 assert sequence in all_emoji, ( 293 'Emoji font should not support %s.' % printable(sequence)) 294 295 for first, second in sorted(equivalent_emoji.items()): 296 assert coverage[first] == coverage[second], ( 297 '%s and %s should map to the same glyph.' % ( 298 printable(first), 299 printable(second))) 300 301 for glyph in set(coverage.values()): 302 maps_to_glyph = [seq for seq in coverage if coverage[seq] == glyph] 303 if len(maps_to_glyph) > 1: 304 # There are more than one sequences mapping to the same glyph. We 305 # need to make sure they were expected to be equivalent. 306 equivalent_seqs = set() 307 for seq in maps_to_glyph: 308 equivalent_seq = seq 309 while equivalent_seq in equivalent_emoji: 310 equivalent_seq = equivalent_emoji[equivalent_seq] 311 equivalent_seqs.add(equivalent_seq) 312 assert len(equivalent_seqs) == 1, ( 313 'The sequences %s should not result in the same glyph %s' % ( 314 printable(equivalent_seqs), 315 glyph)) 316 317 318def check_emoji_defaults(default_emoji): 319 missing_text_chars = _emoji_properties['Emoji'] - default_emoji 320 emoji_font_seen = False 321 for record in _fallback_chain: 322 if 'Zsye' in record.scripts: 323 emoji_font_seen = True 324 # No need to check the emoji font 325 continue 326 # For later fonts, we only check them if they have a script 327 # defined, since the defined script may get them to a higher 328 # score even if they appear after the emoji font. However, 329 # we should skip checking the text symbols font, since 330 # symbol fonts should be able to override the emoji display 331 # style when 'Zsym' is explicitly specified by the user. 332 if emoji_font_seen and (not record.scripts or 'Zsym' in record.scripts): 333 continue 334 335 # Check default emoji-style characters 336 assert_font_supports_none_of_chars(record.font, sorted(default_emoji)) 337 338 # Mark default text-style characters appearing in fonts above the emoji 339 # font as seen 340 if not emoji_font_seen: 341 missing_text_chars -= set(get_best_cmap(record.font)) 342 343 # Noto does not have monochrome glyphs for Unicode 7.0 wingdings and 344 # webdings yet. 345 missing_text_chars -= _chars_by_age['7.0'] 346 assert missing_text_chars == set(), ( 347 'Text style version of some emoji characters are missing: ' + 348 repr(missing_text_chars)) 349 350 351# Setting reverse to true returns a dictionary that maps the values to sets of 352# characters, useful for some binary properties. Otherwise, we get a 353# dictionary that maps characters to the property values, assuming there's only 354# one property in the file. 355def parse_unicode_datafile(file_path, reverse=False): 356 if reverse: 357 output_dict = collections.defaultdict(set) 358 else: 359 output_dict = {} 360 with open(file_path) as datafile: 361 for line in datafile: 362 if '#' in line: 363 line = line[:line.index('#')] 364 line = line.strip() 365 if not line: 366 continue 367 368 chars, prop = line.split(';')[:2] 369 chars = chars.strip() 370 prop = prop.strip() 371 372 if ' ' in chars: # character sequence 373 sequence = [int(ch, 16) for ch in chars.split(' ')] 374 additions = [tuple(sequence)] 375 elif '..' in chars: # character range 376 char_start, char_end = chars.split('..') 377 char_start = int(char_start, 16) 378 char_end = int(char_end, 16) 379 additions = xrange(char_start, char_end+1) 380 else: # singe character 381 additions = [int(chars, 16)] 382 if reverse: 383 output_dict[prop].update(additions) 384 else: 385 for addition in additions: 386 assert addition not in output_dict 387 output_dict[addition] = prop 388 return output_dict 389 390 391def parse_emoji_variants(file_path): 392 emoji_set = set() 393 text_set = set() 394 with open(file_path) as datafile: 395 for line in datafile: 396 if '#' in line: 397 line = line[:line.index('#')] 398 line = line.strip() 399 if not line: 400 continue 401 sequence, description, _ = line.split(';') 402 sequence = sequence.strip().split(' ') 403 base = int(sequence[0], 16) 404 vs = int(sequence[1], 16) 405 description = description.strip() 406 if description == 'text style': 407 text_set.add((base, vs)) 408 elif description == 'emoji style': 409 emoji_set.add((base, vs)) 410 return text_set, emoji_set 411 412 413def parse_ucd(ucd_path): 414 global _emoji_properties, _chars_by_age 415 global _text_variation_sequences, _emoji_variation_sequences 416 global _emoji_sequences, _emoji_zwj_sequences 417 _emoji_properties = parse_unicode_datafile( 418 path.join(ucd_path, 'emoji-data.txt'), reverse=True) 419 emoji_properties_additions = parse_unicode_datafile( 420 path.join(ucd_path, 'additions', 'emoji-data.txt'), reverse=True) 421 for prop in emoji_properties_additions.keys(): 422 _emoji_properties[prop].update(emoji_properties_additions[prop]) 423 424 _chars_by_age = parse_unicode_datafile( 425 path.join(ucd_path, 'DerivedAge.txt'), reverse=True) 426 sequences = parse_emoji_variants( 427 path.join(ucd_path, 'emoji-variation-sequences.txt')) 428 _text_variation_sequences, _emoji_variation_sequences = sequences 429 _emoji_sequences = parse_unicode_datafile( 430 path.join(ucd_path, 'emoji-sequences.txt')) 431 _emoji_sequences.update(parse_unicode_datafile( 432 path.join(ucd_path, 'additions', 'emoji-sequences.txt'))) 433 _emoji_zwj_sequences = parse_unicode_datafile( 434 path.join(ucd_path, 'emoji-zwj-sequences.txt')) 435 _emoji_zwj_sequences.update(parse_unicode_datafile( 436 path.join(ucd_path, 'additions', 'emoji-zwj-sequences.txt'))) 437 438 439def flag_sequence(territory_code): 440 return tuple(0x1F1E6 + ord(ch) - ord('A') for ch in territory_code) 441 442 443UNSUPPORTED_FLAGS = frozenset({ 444 flag_sequence('BL'), flag_sequence('BQ'), flag_sequence('DG'), 445 flag_sequence('EA'), flag_sequence('EH'), flag_sequence('FK'), 446 flag_sequence('GF'), flag_sequence('GP'), flag_sequence('GS'), 447 flag_sequence('MF'), flag_sequence('MQ'), flag_sequence('NC'), 448 flag_sequence('PM'), flag_sequence('RE'), flag_sequence('TF'), 449 flag_sequence('WF'), flag_sequence('XK'), flag_sequence('YT'), 450}) 451 452EQUIVALENT_FLAGS = { 453 flag_sequence('BV'): flag_sequence('NO'), 454 flag_sequence('CP'): flag_sequence('FR'), 455 flag_sequence('HM'): flag_sequence('AU'), 456 flag_sequence('SJ'): flag_sequence('NO'), 457 flag_sequence('UM'): flag_sequence('US'), 458} 459 460COMBINING_KEYCAP = 0x20E3 461 462LEGACY_ANDROID_EMOJI = { 463 0xFE4E5: flag_sequence('JP'), 464 0xFE4E6: flag_sequence('US'), 465 0xFE4E7: flag_sequence('FR'), 466 0xFE4E8: flag_sequence('DE'), 467 0xFE4E9: flag_sequence('IT'), 468 0xFE4EA: flag_sequence('GB'), 469 0xFE4EB: flag_sequence('ES'), 470 0xFE4EC: flag_sequence('RU'), 471 0xFE4ED: flag_sequence('CN'), 472 0xFE4EE: flag_sequence('KR'), 473 0xFE82C: (ord('#'), COMBINING_KEYCAP), 474 0xFE82E: (ord('1'), COMBINING_KEYCAP), 475 0xFE82F: (ord('2'), COMBINING_KEYCAP), 476 0xFE830: (ord('3'), COMBINING_KEYCAP), 477 0xFE831: (ord('4'), COMBINING_KEYCAP), 478 0xFE832: (ord('5'), COMBINING_KEYCAP), 479 0xFE833: (ord('6'), COMBINING_KEYCAP), 480 0xFE834: (ord('7'), COMBINING_KEYCAP), 481 0xFE835: (ord('8'), COMBINING_KEYCAP), 482 0xFE836: (ord('9'), COMBINING_KEYCAP), 483 0xFE837: (ord('0'), COMBINING_KEYCAP), 484} 485 486ZWJ_IDENTICALS = { 487 # KISS 488 (0x1F469, 0x200D, 0x2764, 0x200D, 0x1F48B, 0x200D, 0x1F468): 0x1F48F, 489 # COUPLE WITH HEART 490 (0x1F469, 0x200D, 0x2764, 0x200D, 0x1F468): 0x1F491, 491 # FAMILY 492 (0x1F468, 0x200D, 0x1F469, 0x200D, 0x1F466): 0x1F46A, 493} 494 495ZWJ = 0x200D 496FEMALE_SIGN = 0x2640 497MALE_SIGN = 0x2642 498 499GENDER_DEFAULTS = [ 500 (0x26F9, MALE_SIGN), # PERSON WITH BALL 501 (0x1F3C3, MALE_SIGN), # RUNNER 502 (0x1F3C4, MALE_SIGN), # SURFER 503 (0x1F3CA, MALE_SIGN), # SWIMMER 504 (0x1F3CB, MALE_SIGN), # WEIGHT LIFTER 505 (0x1F3CC, MALE_SIGN), # GOLFER 506 (0x1F46E, MALE_SIGN), # POLICE OFFICER 507 (0x1F46F, FEMALE_SIGN), # WOMAN WITH BUNNY EARS 508 (0x1F471, MALE_SIGN), # PERSON WITH BLOND HAIR 509 (0x1F473, MALE_SIGN), # MAN WITH TURBAN 510 (0x1F477, MALE_SIGN), # CONSTRUCTION WORKER 511 (0x1F481, FEMALE_SIGN), # INFORMATION DESK PERSON 512 (0x1F482, MALE_SIGN), # GUARDSMAN 513 (0x1F486, FEMALE_SIGN), # FACE MASSAGE 514 (0x1F487, FEMALE_SIGN), # HAIRCUT 515 (0x1F575, MALE_SIGN), # SLEUTH OR SPY 516 (0x1F645, FEMALE_SIGN), # FACE WITH NO GOOD GESTURE 517 (0x1F646, FEMALE_SIGN), # FACE WITH OK GESTURE 518 (0x1F647, MALE_SIGN), # PERSON BOWING DEEPLY 519 (0x1F64B, FEMALE_SIGN), # HAPPY PERSON RAISING ONE HAND 520 (0x1F64D, FEMALE_SIGN), # PERSON FROWNING 521 (0x1F64E, FEMALE_SIGN), # PERSON WITH POUTING FACE 522 (0x1F6A3, MALE_SIGN), # ROWBOAT 523 (0x1F6B4, MALE_SIGN), # BICYCLIST 524 (0x1F6B5, MALE_SIGN), # MOUNTAIN BICYCLIST 525 (0x1F6B6, MALE_SIGN), # PEDESTRIAN 526 (0x1F926, FEMALE_SIGN), # FACE PALM 527 (0x1F937, FEMALE_SIGN), # SHRUG 528 (0x1F938, MALE_SIGN), # PERSON DOING CARTWHEEL 529 (0x1F939, MALE_SIGN), # JUGGLING 530 (0x1F93C, MALE_SIGN), # WRESTLERS 531 (0x1F93D, MALE_SIGN), # WATER POLO 532 (0x1F93E, MALE_SIGN), # HANDBALL 533 (0x1F9D6, FEMALE_SIGN), # PERSON IN STEAMY ROOM 534 (0x1F9D7, FEMALE_SIGN), # PERSON CLIMBING 535 (0x1F9D8, FEMALE_SIGN), # PERSON IN LOTUS POSITION 536 (0x1F9D9, FEMALE_SIGN), # MAGE 537 (0x1F9DA, FEMALE_SIGN), # FAIRY 538 (0x1F9DB, FEMALE_SIGN), # VAMPIRE 539 (0x1F9DC, FEMALE_SIGN), # MERPERSON 540 (0x1F9DD, FEMALE_SIGN), # ELF 541 (0x1F9DE, FEMALE_SIGN), # GENIE 542 (0x1F9DF, FEMALE_SIGN), # ZOMBIE 543] 544 545def is_fitzpatrick_modifier(cp): 546 return 0x1F3FB <= cp <= 0x1F3FF 547 548 549def reverse_emoji(seq): 550 rev = list(reversed(seq)) 551 # if there are fitzpatrick modifiers in the sequence, keep them after 552 # the emoji they modify 553 for i in xrange(1, len(rev)): 554 if is_fitzpatrick_modifier(rev[i-1]): 555 rev[i], rev[i-1] = rev[i-1], rev[i] 556 return tuple(rev) 557 558 559def compute_expected_emoji(): 560 equivalent_emoji = {} 561 sequence_pieces = set() 562 all_sequences = set() 563 all_sequences.update(_emoji_variation_sequences) 564 565 # add zwj sequences not in the current emoji-zwj-sequences.txt 566 adjusted_emoji_zwj_sequences = dict(_emoji_zwj_sequences) 567 adjusted_emoji_zwj_sequences.update(_emoji_zwj_sequences) 568 569 # Add empty flag tag sequence that is supported as fallback 570 _emoji_sequences[(0x1F3F4, 0xE007F)] = 'Emoji_Tag_Sequence' 571 572 for sequence in _emoji_sequences.keys(): 573 sequence = tuple(ch for ch in sequence if ch != EMOJI_VS) 574 all_sequences.add(sequence) 575 sequence_pieces.update(sequence) 576 if _emoji_sequences.get(sequence, None) == 'Emoji_Tag_Sequence': 577 # Add reverse of all emoji ZWJ sequences, which are added to the fonts 578 # as a workaround to get the sequences work in RTL text. 579 # TODO: test if these are actually needed by Minikin/HarfBuzz. 580 reversed_seq = reverse_emoji(sequence) 581 all_sequences.add(reversed_seq) 582 equivalent_emoji[reversed_seq] = sequence 583 584 for sequence in adjusted_emoji_zwj_sequences.keys(): 585 sequence = tuple(ch for ch in sequence if ch != EMOJI_VS) 586 all_sequences.add(sequence) 587 sequence_pieces.update(sequence) 588 # Add reverse of all emoji ZWJ sequences, which are added to the fonts 589 # as a workaround to get the sequences work in RTL text. 590 reversed_seq = reverse_emoji(sequence) 591 all_sequences.add(reversed_seq) 592 equivalent_emoji[reversed_seq] = sequence 593 594 # Remove unsupported flags 595 all_sequences.difference_update(UNSUPPORTED_FLAGS) 596 597 # Add all tag characters used in flags 598 sequence_pieces.update(range(0xE0030, 0xE0039 + 1)) 599 sequence_pieces.update(range(0xE0061, 0xE007A + 1)) 600 601 all_emoji = ( 602 _emoji_properties['Emoji'] | 603 all_sequences | 604 sequence_pieces | 605 set(LEGACY_ANDROID_EMOJI.keys())) 606 default_emoji = ( 607 _emoji_properties['Emoji_Presentation'] | 608 all_sequences | 609 set(LEGACY_ANDROID_EMOJI.keys())) 610 611 equivalent_emoji.update(EQUIVALENT_FLAGS) 612 equivalent_emoji.update(LEGACY_ANDROID_EMOJI) 613 equivalent_emoji.update(ZWJ_IDENTICALS) 614 615 for ch, gender in GENDER_DEFAULTS: 616 equivalent_emoji[(ch, ZWJ, gender)] = ch 617 for skin_tone in range(0x1F3FB, 0x1F3FF+1): 618 skin_toned = (ch, skin_tone, ZWJ, gender) 619 if skin_toned in all_emoji: 620 equivalent_emoji[skin_toned] = (ch, skin_tone) 621 622 for seq in _emoji_variation_sequences: 623 equivalent_emoji[seq] = seq[0] 624 625 return all_emoji, default_emoji, equivalent_emoji 626 627 628def check_vertical_metrics(): 629 for record in _fallback_chain: 630 if record.name in ['sans-serif', 'sans-serif-condensed']: 631 font = open_font(record.font) 632 assert font['head'].yMax == 2163 and font['head'].yMin == -555, ( 633 'yMax and yMin of %s do not match expected values.' % (record.font,)) 634 635 if record.name in ['sans-serif', 'sans-serif-condensed', 'serif', 'monospace']: 636 font = open_font(record.font) 637 assert font['hhea'].ascent == 1900 and font['hhea'].descent == -500, ( 638 'ascent and descent of %s do not match expected values.' % (record.font,)) 639 640 641def main(): 642 global _fonts_dir 643 target_out = sys.argv[1] 644 _fonts_dir = path.join(target_out, 'fonts') 645 646 fonts_xml_path = path.join(target_out, 'etc', 'fonts.xml') 647 parse_fonts_xml(fonts_xml_path) 648 649 check_vertical_metrics() 650 651 hyphens_dir = path.join(target_out, 'usr', 'hyphen-data') 652 check_hyphens(hyphens_dir) 653 654 check_emoji = sys.argv[2] 655 if check_emoji == 'true': 656 ucd_path = sys.argv[3] 657 parse_ucd(ucd_path) 658 all_emoji, default_emoji, equivalent_emoji = compute_expected_emoji() 659 check_emoji_coverage(all_emoji, equivalent_emoji) 660 check_emoji_defaults(default_emoji) 661 662 663if __name__ == '__main__': 664 main() 665