1#!/usr/bin/python
2# coding=UTF-8
3#
4# Copyright 2014 Google Inc. All rights reserved.
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10#     http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18"""Create a curated subset of NotoSansSymbols for Android."""
19
20__author__ = 'roozbeh@google.com (Roozbeh Pournader)'
21
22import os
23import sys
24
25from nototools import subset
26from nototools import unicode_data
27
28# Unicode blocks that we want to include in the font
29BLOCKS_TO_INCLUDE = """
3020D0..20FF; Combining Diacritical Marks for Symbols
312100..214F; Letterlike Symbols
322190..21FF; Arrows
332200..22FF; Mathematical Operators
342300..23FF; Miscellaneous Technical
352400..243F; Control Pictures
362440..245F; Optical Character Recognition
372460..24FF; Enclosed Alphanumerics
382500..257F; Box Drawing
392580..259F; Block Elements
4025A0..25FF; Geometric Shapes
412600..26FF; Miscellaneous Symbols
422700..27BF; Dingbats
4327C0..27EF; Miscellaneous Mathematical Symbols-A
4427F0..27FF; Supplemental Arrows-A
452800..28FF; Braille Patterns
462900..297F; Supplemental Arrows-B
472980..29FF; Miscellaneous Mathematical Symbols-B
482A00..2AFF; Supplemental Mathematical Operators
492B00..2BFF; Miscellaneous Symbols and Arrows
504DC0..4DFF; Yijing Hexagram Symbols
5110140..1018F; Ancient Greek Numbers
5210190..101CF; Ancient Symbols
53101D0..101FF; Phaistos Disc
541D000..1D0FF; Byzantine Musical Symbols
551D100..1D1FF; Musical Symbols
561D200..1D24F; Ancient Greek Musical Notation
571D300..1D35F; Tai Xuan Jing Symbols
581D360..1D37F; Counting Rod Numerals
591D400..1D7FF; Mathematical Alphanumeric Symbols
601F000..1F02F; Mahjong Tiles
611F030..1F09F; Domino Tiles
621F0A0..1F0FF; Playing Cards
631F700..1F77F; Alchemical Symbols
64"""
65
66# One-off characters to be included. At the moment, this is the Bitcoin sign
67# (since it's not supported in Roboto yet, and the Japanese TV symbols of
68# Unicode 9.
69ONE_OFF_ADDITIONS = {
70    0x20BF, # ₿ BITCOIN SIGN
71    0x1F19B, # �� SQUARED THREE D
72    0x1F19C, # �� SQUARED SECOND SCREEN
73    0x1F19D, # �� SQUARED TWO K;So;0;L;;;;;N;;;;;
74    0x1F19E, # �� SQUARED FOUR K;So;0;L;;;;;N;;;;;
75    0x1F19F, # �� SQUARED EIGHT K;So;0;L;;;;;N;;;;;
76    0x1F1A0, # �� SQUARED FIVE POINT ONE;So;0;L;;;;;N;;;;;
77    0x1F1A1, # �� SQUARED SEVEN POINT ONE;So;0;L;;;;;N;;;;;
78    0x1F1A2, # �� SQUARED TWENTY-TWO POINT TWO;So;0;L;;;;;N;;;;;
79    0x1F1A3, # �� SQUARED SIXTY P;So;0;L;;;;;N;;;;;
80    0x1F1A4, # �� SQUARED ONE HUNDRED TWENTY P;So;0;L;;;;;N;;;;;
81    0x1F1A5, # �� SQUARED LATIN SMALL LETTER D;So;0;L;;;;;N;;;;;
82    0x1F1A6, # �� SQUARED HC;So;0;L;;;;;N;;;;;
83    0x1F1A7, # �� SQUARED HDR;So;0;L;;;;;N;;;;;
84    0x1F1A8, # �� SQUARED HI-RES;So;0;L;;;;;N;;;;;
85    0x1F1A9, # �� SQUARED LOSSLESS;So;0;L;;;;;N;;;;;
86    0x1F1AA, # �� SQUARED SHV;So;0;L;;;;;N;;;;;
87    0x1F1AB, # �� SQUARED UHD;So;0;L;;;;;N;;;;;
88    0x1F1AC, # �� SQUARED VOD;So;0;L;;;;;N;;;;;
89    0x1F23B, # �� SQUARED CJK UNIFIED IDEOGRAPH-914D
90}
91
92# letter-based characters, provided by Roboto
93LETTERLIKE_CHARS_IN_ROBOTO = {
94    0x2100, # ℀ ACCOUNT OF
95    0x2101, # ℁ ADDRESSED TO THE SUBJECT
96    0x2103, # ℃ DEGREE CELSIUS
97    0x2105, # ℅ CARE OF
98    0x2106, # ℆ CADA UNA
99    0x2109, # ℉ DEGREE FAHRENHEIT
100    0x2113, # ℓ SCRIPT SMALL L
101    0x2116, # № NUMERO SIGN
102    0x2117, # ℗ SOUND RECORDING COPYRIGHT
103    0x211E, # ℞ PRESCRIPTION TAKE
104    0x211F, # ℟ RESPONSE
105    0x2120, # ℠ SERVICE MARK
106    0x2121, # ℡ TELEPHONE SIGN
107    0x2122, # ™ TRADE MARK SIGN
108    0x2123, # ℣ VERSICLE
109    0x2125, # ℥ OUNCE SIGN
110    0x2126, # Ω OHM SIGN
111    0x212A, # K KELVIN SIGN
112    0x212B, # Å ANGSTROM SIGN
113    0x212E, # ℮ ESTIMATED SYMBOL
114    0x2132, # Ⅎ TURNED CAPITAL F
115    0x213B, # ℻ FACSIMILE SIGN
116    0x214D, # ⅍ AKTIESELSKAB
117    0x214F, # ⅏ SYMBOL FOR SAMARITAN SOURCE
118}
119
120DEFAULT_EMOJI = unicode_data.get_presentation_default_emoji()
121
122EMOJI_ADDITIONS_FILE = os.path.join(
123    os.path.dirname(__file__), os.path.pardir, os.path.pardir,
124    'unicode', 'additions', 'emoji-data.txt')
125
126
127# Characters we have decided we are doing as emoji-style in Android,
128# despite UTR#51's recommendation
129def get_android_emoji():
130    """Return additional Android default emojis."""
131    android_emoji = set()
132    with open(EMOJI_ADDITIONS_FILE) as emoji_additions:
133        data = unicode_data._parse_semicolon_separated_data(
134            emoji_additions.read())
135        for codepoint, prop in data:
136            if prop == 'Emoji_Presentation':
137                android_emoji.add(int(codepoint, 16))
138    return android_emoji
139
140
141def main(argv):
142    """Subset the Noto Symbols font.
143
144    The first argument is the source file name, and the second argument is
145    the target file name.
146    """
147
148    target_coverage = set()
149    # Add all characters in BLOCKS_TO_INCLUDE
150    for first, last, _ in unicode_data._parse_code_ranges(BLOCKS_TO_INCLUDE):
151        target_coverage.update(range(first, last+1))
152
153    # Add one-off characters
154    target_coverage |= ONE_OFF_ADDITIONS
155    # Remove characters preferably coming from Roboto
156    target_coverage -= LETTERLIKE_CHARS_IN_ROBOTO
157    # Remove characters that are supposed to default to emoji
158    android_emoji = get_android_emoji()
159    target_coverage -= DEFAULT_EMOJI | android_emoji
160
161    # Remove dentistry symbols, as their main use appears to be for CJK:
162    # http://www.unicode.org/L2/L2000/00098-n2195.pdf
163    target_coverage -= set(range(0x23BE, 0x23CC+1))
164
165    # Remove COMBINING ENCLOSING KEYCAP. It's needed for Android's color emoji
166    # mechanism to work properly.
167    target_coverage.remove(0x20E3)
168
169    source_file_name = argv[1]
170    target_file_name = argv[2]
171    subset.subset_font(
172        source_file_name,
173        target_file_name,
174        include=target_coverage)
175
176    second_subset_coverage = DEFAULT_EMOJI | android_emoji
177    second_subset_file_name = argv[3]
178    subset.subset_font(
179        source_file_name,
180        second_subset_file_name,
181        include=second_subset_coverage)
182
183
184if __name__ == '__main__':
185    main(sys.argv)
186