_c_m_a_p.py revision 1a4f96b7871a0cf9b83e89c5f70854ddb0f41a5e
1f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)from __future__ import print_function, division 2f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)from fontTools.misc.py23 import * 3f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)from fontTools.misc.textTools import safeEval, readHex 4f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)from . import DefaultTable 5f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)import sys 6f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)import struct 7f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)import array 8f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)import operator 9f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 10f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 11f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)class table__c_m_a_p(DefaultTable.DefaultTable): 12f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 13f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) def getcmap(self, platformID, platEncID): 14f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) for subtable in self.tables: 15f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (subtable.platformID == platformID and 16f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) subtable.platEncID == platEncID): 17f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return subtable 18f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return None # not found 19f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 20f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) def decompile(self, data, ttFont): 21f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) tableVersion, numSubTables = struct.unpack(">HH", data[:4]) 22f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) self.tableVersion = int(tableVersion) 23116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch self.tables = tables = [] 24f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) seenOffsets = {} 25f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) for i in range(numSubTables): 26f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) platformID, platEncID, offset = struct.unpack( 27f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) ">HHl", data[4+i*8:4+(i+1)*8]) 28f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) platformID, platEncID = int(platformID), int(platEncID) 29f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) format, length = struct.unpack(">HH", data[offset:offset+4]) 30f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if format in [8,10,12,13]: 31f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) format, reserved, length = struct.unpack(">HHL", data[offset:offset+8]) 32f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) elif format in [14]: 33f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) format, length = struct.unpack(">HL", data[offset:offset+6]) 34f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 35f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if not length: 36f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) print("Error: cmap subtable is reported as having zero length: platformID %s, platEncID %s, format %s offset %s. Skipping table." % (platformID, platEncID,format, offset)) 37f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) continue 38f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if format not in cmap_classes: 39f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) table = cmap_format_unknown(format) 40f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) else: 41f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) table = cmap_classes[format](format) 42f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) table.platformID = platformID 43f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) table.platEncID = platEncID 44f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) # Note that by default we decompile only the subtable header info; 45f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) # any other data gets decompiled only when an attribute of the 46f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) # subtable is referenced. 47f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) table.decompileHeader(data[offset:offset+int(length)], ttFont) 48f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if offset in seenOffsets: 49f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) table.cmap = tables[seenOffsets[offset]].cmap 50f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) else: 51f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) seenOffsets[offset] = i 52f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) tables.append(table) 53f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 54f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) def compile(self, ttFont): 55f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) self.tables.sort() # sort according to the spec; see CmapSubtable.__lt__() 56f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) numSubTables = len(self.tables) 57f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) totalOffset = 4 + 8 * numSubTables 58f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) data = struct.pack(">HH", self.tableVersion, numSubTables) 59f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) tableData = b"" 60f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) seen = {} # Some tables are the same object reference. Don't compile them twice. 61f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) done = {} # Some tables are different objects, but compile to the same data chunk 62f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) for table in self.tables: 63f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) try: 64f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) offset = seen[id(table.cmap)] 65f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) except KeyError: 66f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) chunk = table.compile(ttFont) 67f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if chunk in done: 68f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) offset = done[chunk] 69f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) else: 70f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData) 71f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) tableData = tableData + chunk 72f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset) 73f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return data + tableData 74f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 75f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) def toXML(self, writer, ttFont): 76f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) writer.simpletag("tableVersion", version=self.tableVersion) 77f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) writer.newline() 78116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch for table in self.tables: 79116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch table.toXML(writer, ttFont) 80f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 81f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) def fromXML(self, name, attrs, content, ttFont): 82f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if name == "tableVersion": 83f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) self.tableVersion = safeEval(attrs["version"]) 84f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return 85f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if name[:12] != "cmap_format_": 86f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return 87 if not hasattr(self, "tables"): 88 self.tables = [] 89 format = safeEval(name[12:]) 90 if format not in cmap_classes: 91 table = cmap_format_unknown(format) 92 else: 93 table = cmap_classes[format](format) 94 table.platformID = safeEval(attrs["platformID"]) 95 table.platEncID = safeEval(attrs["platEncID"]) 96 table.fromXML(name, attrs, content, ttFont) 97 self.tables.append(table) 98 99 100class CmapSubtable(object): 101 102 def __init__(self, format): 103 self.format = format 104 self.data = None 105 self.ttFont = None 106 107 def __getattr__(self, attr): 108 # allow lazy decompilation of subtables. 109 if attr[:2] == '__': # don't handle requests for member functions like '__lt__' 110 raise AttributeError(attr) 111 if self.data is None: 112 raise AttributeError(attr) 113 self.decompile(None, None) # use saved data. 114 self.data = None # Once this table has been decompiled, make sure we don't 115 # just return the original data. Also avoids recursion when 116 # called with an attribute that the cmap subtable doesn't have. 117 return getattr(self, attr) 118 119 def decompileHeader(self, data, ttFont): 120 format, length, language = struct.unpack(">HHH", data[:6]) 121 assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length) 122 self.format = int(format) 123 self.length = int(length) 124 self.language = int(language) 125 self.data = data[6:] 126 self.ttFont = ttFont 127 128 def toXML(self, writer, ttFont): 129 writer.begintag(self.__class__.__name__, [ 130 ("platformID", self.platformID), 131 ("platEncID", self.platEncID), 132 ("language", self.language), 133 ]) 134 writer.newline() 135 codes = sorted(self.cmap.items()) 136 self._writeCodes(codes, writer) 137 writer.endtag(self.__class__.__name__) 138 writer.newline() 139 140 def _writeCodes(self, codes, writer): 141 if (self.platformID, self.platEncID) == (3, 1) or (self.platformID, self.platEncID) == (3, 10) or self.platformID == 0: 142 from fontTools.unicode import Unicode 143 isUnicode = 1 144 else: 145 isUnicode = 0 146 for code, name in codes: 147 writer.simpletag("map", code=hex(code), name=name) 148 if isUnicode: 149 writer.comment(Unicode[code]) 150 writer.newline() 151 152 def __lt__(self, other): 153 if not isinstance(other, CmapSubtable): 154 return NotImplemented 155 156 # implemented so that list.sort() sorts according to the spec. 157 selfTuple = ( 158 getattr(self, "platformID", None), 159 getattr(self, "platEncID", None), 160 getattr(self, "language", None), 161 self.__dict__) 162 otherTuple = ( 163 getattr(other, "platformID", None), 164 getattr(other, "platEncID", None), 165 getattr(other, "language", None), 166 other.__dict__) 167 return selfTuple < otherTuple 168 169 170class cmap_format_0(CmapSubtable): 171 172 def decompile(self, data, ttFont): 173 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 174 # If not, someone is calling the subtable decompile() directly, and must provide both args. 175 if data is not None and ttFont is not None: 176 self.decompileHeader(data[offset:offset+int(length)], ttFont) 177 else: 178 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 179 data = self.data # decompileHeader assigns the data after the header to self.data 180 assert 262 == self.length, "Format 0 cmap subtable not 262 bytes" 181 glyphIdArray = array.array("B") 182 glyphIdArray.fromstring(self.data) 183 self.cmap = cmap = {} 184 lenArray = len(glyphIdArray) 185 charCodes = list(range(lenArray)) 186 names = map(self.ttFont.getGlyphName, glyphIdArray) 187 list(map(operator.setitem, [cmap]*lenArray, charCodes, names)) 188 189 190 def compile(self, ttFont): 191 if self.data: 192 return struct.pack(">HHH", 0, 262, self.language) + self.data 193 194 charCodeList = sorted(self.cmap.items()) 195 charCodes = [entry[0] for entry in charCodeList] 196 valueList = [entry[1] for entry in charCodeList] 197 assert charCodes == list(range(256)) 198 valueList = map(ttFont.getGlyphID, valueList) 199 200 glyphIdArray = array.array("B", valueList) 201 data = struct.pack(">HHH", 0, 262, self.language) + glyphIdArray.tostring() 202 assert len(data) == 262 203 return data 204 205 def fromXML(self, name, attrs, content, ttFont): 206 self.language = safeEval(attrs["language"]) 207 if not hasattr(self, "cmap"): 208 self.cmap = {} 209 cmap = self.cmap 210 for element in content: 211 if not isinstance(element, tuple): 212 continue 213 name, attrs, content = element 214 if name != "map": 215 continue 216 cmap[safeEval(attrs["code"])] = attrs["name"] 217 218 219subHeaderFormat = ">HHhH" 220class SubHeader(object): 221 def __init__(self): 222 self.firstCode = None 223 self.entryCount = None 224 self.idDelta = None 225 self.idRangeOffset = None 226 self.glyphIndexArray = [] 227 228class cmap_format_2(CmapSubtable): 229 230 def setIDDelta(self, subHeader): 231 subHeader.idDelta = 0 232 # find the minGI which is not zero. 233 minGI = subHeader.glyphIndexArray[0] 234 for gid in subHeader.glyphIndexArray: 235 if (gid != 0) and (gid < minGI): 236 minGI = gid 237 # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1. 238 # idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K. 239 # We would like to pick an idDelta such that the first glyphArray GID is 1, 240 # so that we are more likely to be able to combine glypharray GID subranges. 241 # This means that we have a problem when minGI is > 32K 242 # Since the final gi is reconstructed from the glyphArray GID by: 243 # (short)finalGID = (gid + idDelta) % 0x10000), 244 # we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the 245 # negative number to an unsigned short. 246 247 if (minGI > 1): 248 if minGI > 0x7FFF: 249 subHeader.idDelta = -(0x10000 - minGI) -1 250 else: 251 subHeader.idDelta = minGI -1 252 idDelta = subHeader.idDelta 253 for i in range(subHeader.entryCount): 254 gid = subHeader.glyphIndexArray[i] 255 if gid > 0: 256 subHeader.glyphIndexArray[i] = gid - idDelta 257 258 259 def decompile(self, data, ttFont): 260 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 261 # If not, someone is calling the subtable decompile() directly, and must provide both args. 262 if data is not None and ttFont is not None: 263 self.decompileHeader(data[offset:offset+int(length)], ttFont) 264 else: 265 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 266 267 data = self.data # decompileHeader assigns the data after the header to self.data 268 subHeaderKeys = [] 269 maxSubHeaderindex = 0 270 # get the key array, and determine the number of subHeaders. 271 allKeys = array.array("H") 272 allKeys.fromstring(data[:512]) 273 data = data[512:] 274 if sys.byteorder != "big": 275 allKeys.byteswap() 276 subHeaderKeys = [ key//8 for key in allKeys] 277 maxSubHeaderindex = max(subHeaderKeys) 278 279 #Load subHeaders 280 subHeaderList = [] 281 pos = 0 282 for i in range(maxSubHeaderindex + 1): 283 subHeader = SubHeader() 284 (subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \ 285 subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8]) 286 pos += 8 287 giDataPos = pos + subHeader.idRangeOffset-2 288 giList = array.array("H") 289 giList.fromstring(data[giDataPos:giDataPos + subHeader.entryCount*2]) 290 if sys.byteorder != "big": 291 giList.byteswap() 292 subHeader.glyphIndexArray = giList 293 subHeaderList.append(subHeader) 294 # How this gets processed. 295 # Charcodes may be one or two bytes. 296 # The first byte of a charcode is mapped through the subHeaderKeys, to select 297 # a subHeader. For any subheader but 0, the next byte is then mapped through the 298 # selected subheader. If subheader Index 0 is selected, then the byte itself is 299 # mapped through the subheader, and there is no second byte. 300 # Then assume that the subsequent byte is the first byte of the next charcode,and repeat. 301 # 302 # Each subheader references a range in the glyphIndexArray whose length is entryCount. 303 # The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray 304 # referenced by another subheader. 305 # The only subheader that will be referenced by more than one first-byte value is the subheader 306 # that maps the entire range of glyphID values to glyphIndex 0, e.g notdef: 307 # {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx} 308 # A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex. 309 # A subheader specifies a subrange within (0...256) by the 310 # firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero 311 # (e.g. glyph not in font). 312 # If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar). 313 # The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by 314 # counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the 315 # glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex. 316 # Example for Logocut-Medium 317 # first byte of charcode = 129; selects subheader 1. 318 # subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252} 319 # second byte of charCode = 66 320 # the index offset = 66-64 = 2. 321 # The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is: 322 # [glyphIndexArray index], [subrange array index] = glyphIndex 323 # [256], [0]=1 from charcode [129, 64] 324 # [257], [1]=2 from charcode [129, 65] 325 # [258], [2]=3 from charcode [129, 66] 326 # [259], [3]=4 from charcode [129, 67] 327 # So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero, 328 # add it to the glyphID to get the final glyphIndex 329 # value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew! 330 331 self.data = b"" 332 self.cmap = cmap = {} 333 notdefGI = 0 334 for firstByte in range(256): 335 subHeadindex = subHeaderKeys[firstByte] 336 subHeader = subHeaderList[subHeadindex] 337 if subHeadindex == 0: 338 if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount): 339 continue # gi is notdef. 340 else: 341 charCode = firstByte 342 offsetIndex = firstByte - subHeader.firstCode 343 gi = subHeader.glyphIndexArray[offsetIndex] 344 if gi != 0: 345 gi = (gi + subHeader.idDelta) % 0x10000 346 else: 347 continue # gi is notdef. 348 cmap[charCode] = gi 349 else: 350 if subHeader.entryCount: 351 charCodeOffset = firstByte * 256 + subHeader.firstCode 352 for offsetIndex in range(subHeader.entryCount): 353 charCode = charCodeOffset + offsetIndex 354 gi = subHeader.glyphIndexArray[offsetIndex] 355 if gi != 0: 356 gi = (gi + subHeader.idDelta) % 0x10000 357 else: 358 continue 359 cmap[charCode] = gi 360 # If not subHeader.entryCount, then all char codes with this first byte are 361 # mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the 362 # same as mapping it to .notdef. 363 # cmap values are GID's. 364 glyphOrder = self.ttFont.getGlyphOrder() 365 gids = list(cmap.values()) 366 charCodes = list(cmap.keys()) 367 lenCmap = len(gids) 368 try: 369 names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids )) 370 except IndexError: 371 getGlyphName = self.ttFont.getGlyphName 372 names = list(map(getGlyphName, gids )) 373 list(map(operator.setitem, [cmap]*lenCmap, charCodes, names)) 374 375 376 def compile(self, ttFont): 377 if self.data: 378 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 379 kEmptyTwoCharCodeRange = -1 380 notdefGI = 0 381 382 items = sorted(self.cmap.items()) 383 charCodes = [item[0] for item in items] 384 names = [item[1] for item in items] 385 nameMap = ttFont.getReverseGlyphMap() 386 lenCharCodes = len(charCodes) 387 try: 388 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) 389 except KeyError: 390 nameMap = ttFont.getReverseGlyphMap(rebuild=True) 391 try: 392 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) 393 except KeyError: 394 # allow virtual GIDs in format 2 tables 395 gids = [] 396 for name in names: 397 try: 398 gid = nameMap[name] 399 except KeyError: 400 try: 401 if (name[:3] == 'gid'): 402 gid = eval(name[3:]) 403 else: 404 gid = ttFont.getGlyphID(name) 405 except: 406 raise KeyError(name) 407 408 gids.append(gid) 409 410 # Process the (char code to gid) item list in char code order. 411 # By definition, all one byte char codes map to subheader 0. 412 # For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0, 413 # which defines all char codes in its range to map to notdef) unless proven otherwise. 414 # Note that since the char code items are processed in char code order, all the char codes with the 415 # same first byte are in sequential order. 416 417 subHeaderKeys = [ kEmptyTwoCharCodeRange for x in range(256)] # list of indices into subHeaderList. 418 subHeaderList = [] 419 420 # We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up 421 # with a cmap where all the one byte char codes map to notdef, 422 # with the result that the subhead 0 would not get created just by processing the item list. 423 charCode = charCodes[0] 424 if charCode > 255: 425 subHeader = SubHeader() 426 subHeader.firstCode = 0 427 subHeader.entryCount = 0 428 subHeader.idDelta = 0 429 subHeader.idRangeOffset = 0 430 subHeaderList.append(subHeader) 431 432 433 lastFirstByte = -1 434 items = zip(charCodes, gids) 435 for charCode, gid in items: 436 if gid == 0: 437 continue 438 firstbyte = charCode >> 8 439 secondByte = charCode & 0x00FF 440 441 if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one. 442 if lastFirstByte > -1: 443 # fix GI's and iDelta of current subheader. 444 self.setIDDelta(subHeader) 445 446 # If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero 447 # for the indices matching the char codes. 448 if lastFirstByte == 0: 449 for index in range(subHeader.entryCount): 450 charCode = subHeader.firstCode + index 451 subHeaderKeys[charCode] = 0 452 453 assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange." 454 # init new subheader 455 subHeader = SubHeader() 456 subHeader.firstCode = secondByte 457 subHeader.entryCount = 1 458 subHeader.glyphIndexArray.append(gid) 459 subHeaderList.append(subHeader) 460 subHeaderKeys[firstbyte] = len(subHeaderList) -1 461 lastFirstByte = firstbyte 462 else: 463 # need to fill in with notdefs all the code points between the last charCode and the current charCode. 464 codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount) 465 for i in range(codeDiff): 466 subHeader.glyphIndexArray.append(notdefGI) 467 subHeader.glyphIndexArray.append(gid) 468 subHeader.entryCount = subHeader.entryCount + codeDiff + 1 469 470 # fix GI's and iDelta of last subheader that we we added to the subheader array. 471 self.setIDDelta(subHeader) 472 473 # Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges. 474 subHeader = SubHeader() 475 subHeader.firstCode = 0 476 subHeader.entryCount = 0 477 subHeader.idDelta = 0 478 subHeader.idRangeOffset = 2 479 subHeaderList.append(subHeader) 480 emptySubheadIndex = len(subHeaderList) - 1 481 for index in range(256): 482 if subHeaderKeys[index] == kEmptyTwoCharCodeRange: 483 subHeaderKeys[index] = emptySubheadIndex 484 # Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the 485 # idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray, 486 # since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with 487 # charcode 0 and GID 0. 488 489 idRangeOffset = (len(subHeaderList)-1)*8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset. 490 subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2. 491 for index in range(subheadRangeLen): 492 subHeader = subHeaderList[index] 493 subHeader.idRangeOffset = 0 494 for j in range(index): 495 prevSubhead = subHeaderList[j] 496 if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray 497 subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8 498 subHeader.glyphIndexArray = [] 499 break 500 if subHeader.idRangeOffset == 0: # didn't find one. 501 subHeader.idRangeOffset = idRangeOffset 502 idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray. 503 else: 504 idRangeOffset = idRangeOffset - 8 # one less subheader 505 506 # Now we can write out the data! 507 length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array. 508 for subhead in subHeaderList[:-1]: 509 length = length + len(subhead.glyphIndexArray)*2 # We can't use subhead.entryCount, as some of the subhead may share subArrays. 510 dataList = [struct.pack(">HHH", 2, length, self.language)] 511 for index in subHeaderKeys: 512 dataList.append(struct.pack(">H", index*8)) 513 for subhead in subHeaderList: 514 dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset)) 515 for subhead in subHeaderList[:-1]: 516 for gi in subhead.glyphIndexArray: 517 dataList.append(struct.pack(">H", gi)) 518 data = bytesjoin(dataList) 519 assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length) 520 return data 521 522 523 def fromXML(self, name, attrs, content, ttFont): 524 self.language = safeEval(attrs["language"]) 525 if not hasattr(self, "cmap"): 526 self.cmap = {} 527 cmap = self.cmap 528 529 for element in content: 530 if not isinstance(element, tuple): 531 continue 532 name, attrs, content = element 533 if name != "map": 534 continue 535 cmap[safeEval(attrs["code"])] = attrs["name"] 536 537 538cmap_format_4_format = ">7H" 539 540#uint16 endCode[segCount] # Ending character code for each segment, last = 0xFFFF. 541#uint16 reservedPad # This value should be zero 542#uint16 startCode[segCount] # Starting character code for each segment 543#uint16 idDelta[segCount] # Delta for all character codes in segment 544#uint16 idRangeOffset[segCount] # Offset in bytes to glyph indexArray, or 0 545#uint16 glyphIndexArray[variable] # Glyph index array 546 547def splitRange(startCode, endCode, cmap): 548 # Try to split a range of character codes into subranges with consecutive 549 # glyph IDs in such a way that the cmap4 subtable can be stored "most" 550 # efficiently. I can't prove I've got the optimal solution, but it seems 551 # to do well with the fonts I tested: none became bigger, many became smaller. 552 if startCode == endCode: 553 return [], [endCode] 554 555 lastID = cmap[startCode] 556 lastCode = startCode 557 inOrder = None 558 orderedBegin = None 559 subRanges = [] 560 561 # Gather subranges in which the glyph IDs are consecutive. 562 for code in range(startCode + 1, endCode + 1): 563 glyphID = cmap[code] 564 565 if glyphID - 1 == lastID: 566 if inOrder is None or not inOrder: 567 inOrder = 1 568 orderedBegin = lastCode 569 else: 570 if inOrder: 571 inOrder = 0 572 subRanges.append((orderedBegin, lastCode)) 573 orderedBegin = None 574 575 lastID = glyphID 576 lastCode = code 577 578 if inOrder: 579 subRanges.append((orderedBegin, lastCode)) 580 assert lastCode == endCode 581 582 # Now filter out those new subranges that would only make the data bigger. 583 # A new segment cost 8 bytes, not using a new segment costs 2 bytes per 584 # character. 585 newRanges = [] 586 for b, e in subRanges: 587 if b == startCode and e == endCode: 588 break # the whole range, we're fine 589 if b == startCode or e == endCode: 590 threshold = 4 # split costs one more segment 591 else: 592 threshold = 8 # split costs two more segments 593 if (e - b + 1) > threshold: 594 newRanges.append((b, e)) 595 subRanges = newRanges 596 597 if not subRanges: 598 return [], [endCode] 599 600 if subRanges[0][0] != startCode: 601 subRanges.insert(0, (startCode, subRanges[0][0] - 1)) 602 if subRanges[-1][1] != endCode: 603 subRanges.append((subRanges[-1][1] + 1, endCode)) 604 605 # Fill the "holes" in the segments list -- those are the segments in which 606 # the glyph IDs are _not_ consecutive. 607 i = 1 608 while i < len(subRanges): 609 if subRanges[i-1][1] + 1 != subRanges[i][0]: 610 subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1)) 611 i = i + 1 612 i = i + 1 613 614 # Transform the ranges into startCode/endCode lists. 615 start = [] 616 end = [] 617 for b, e in subRanges: 618 start.append(b) 619 end.append(e) 620 start.pop(0) 621 622 assert len(start) + 1 == len(end) 623 return start, end 624 625 626class cmap_format_4(CmapSubtable): 627 628 def decompile(self, data, ttFont): 629 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 630 # If not, someone is calling the subtable decompile() directly, and must provide both args. 631 if data is not None and ttFont is not None: 632 self.decompileHeader(self.data[offset:offset+int(length)], ttFont) 633 else: 634 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 635 636 data = self.data # decompileHeader assigns the data after the header to self.data 637 (segCountX2, searchRange, entrySelector, rangeShift) = \ 638 struct.unpack(">4H", data[:8]) 639 data = data[8:] 640 segCount = segCountX2 // 2 641 642 allCodes = array.array("H") 643 allCodes.fromstring(data) 644 self.data = data = None 645 646 if sys.byteorder != "big": 647 allCodes.byteswap() 648 649 # divide the data 650 endCode = allCodes[:segCount] 651 allCodes = allCodes[segCount+1:] # the +1 is skipping the reservedPad field 652 startCode = allCodes[:segCount] 653 allCodes = allCodes[segCount:] 654 idDelta = allCodes[:segCount] 655 allCodes = allCodes[segCount:] 656 idRangeOffset = allCodes[:segCount] 657 glyphIndexArray = allCodes[segCount:] 658 lenGIArray = len(glyphIndexArray) 659 660 # build 2-byte character mapping 661 charCodes = [] 662 gids = [] 663 for i in range(len(startCode) - 1): # don't do 0xffff! 664 rangeCharCodes = list(range(startCode[i], endCode[i] + 1)) 665 charCodes = charCodes + rangeCharCodes 666 for charCode in rangeCharCodes: 667 rangeOffset = idRangeOffset[i] 668 if rangeOffset == 0: 669 glyphID = charCode + idDelta[i] 670 else: 671 # *someone* needs to get killed. 672 index = idRangeOffset[i] // 2 + (charCode - startCode[i]) + i - len(idRangeOffset) 673 assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array is not less than the length of the array (%d) !" % (i, index, lenGIArray) 674 if glyphIndexArray[index] != 0: # if not missing glyph 675 glyphID = glyphIndexArray[index] + idDelta[i] 676 else: 677 glyphID = 0 # missing glyph 678 gids.append(glyphID % 0x10000) 679 680 self.cmap = cmap = {} 681 lenCmap = len(gids) 682 glyphOrder = self.ttFont.getGlyphOrder() 683 try: 684 names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids )) 685 except IndexError: 686 getGlyphName = self.ttFont.getGlyphName 687 names = list(map(getGlyphName, gids )) 688 list(map(operator.setitem, [cmap]*lenCmap, charCodes, names)) 689 690 691 692 def setIDDelta(self, idDelta): 693 # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1. 694 # idDelta is a short, and must be between -32K and 32K 695 # startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1 696 # This means that we have a problem because we can need to assign to idDelta values 697 # between -(64K-2) and 64K -1. 698 # Since the final gi is reconstructed from the glyphArray GID by: 699 # (short)finalGID = (gid + idDelta) % 0x10000), 700 # we can get from a startCode of 0 to a final GID of 64 -1K by subtracting 1, and casting the 701 # negative number to an unsigned short. 702 # Similarly , we can get from a startCode of 64K-1 to a final GID of 1 by adding 2, because of 703 # the modulo arithmetic. 704 705 if idDelta > 0x7FFF: 706 idDelta = idDelta - 0x10000 707 elif idDelta < -0x7FFF: 708 idDelta = idDelta + 0x10000 709 710 return idDelta 711 712 713 def compile(self, ttFont): 714 if self.data: 715 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 716 717 from fontTools.ttLib.sfnt import maxPowerOfTwo 718 719 charCodes = list(self.cmap.keys()) 720 lenCharCodes = len(charCodes) 721 if lenCharCodes == 0: 722 startCode = [0xffff] 723 endCode = [0xffff] 724 else: 725 charCodes.sort() 726 names = list(map(operator.getitem, [self.cmap]*lenCharCodes, charCodes)) 727 nameMap = ttFont.getReverseGlyphMap() 728 try: 729 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) 730 except KeyError: 731 nameMap = ttFont.getReverseGlyphMap(rebuild=True) 732 try: 733 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) 734 except KeyError: 735 # allow virtual GIDs in format 4 tables 736 gids = [] 737 for name in names: 738 try: 739 gid = nameMap[name] 740 except KeyError: 741 try: 742 if (name[:3] == 'gid'): 743 gid = eval(name[3:]) 744 else: 745 gid = ttFont.getGlyphID(name) 746 except: 747 raise KeyError(name) 748 749 gids.append(gid) 750 cmap = {} # code:glyphID mapping 751 list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids)) 752 753 # Build startCode and endCode lists. 754 # Split the char codes in ranges of consecutive char codes, then split 755 # each range in more ranges of consecutive/not consecutive glyph IDs. 756 # See splitRange(). 757 lastCode = charCodes[0] 758 endCode = [] 759 startCode = [lastCode] 760 for charCode in charCodes[1:]: # skip the first code, it's the first start code 761 if charCode == lastCode + 1: 762 lastCode = charCode 763 continue 764 start, end = splitRange(startCode[-1], lastCode, cmap) 765 startCode.extend(start) 766 endCode.extend(end) 767 startCode.append(charCode) 768 lastCode = charCode 769 endCode.append(lastCode) 770 startCode.append(0xffff) 771 endCode.append(0xffff) 772 773 # build up rest of cruft 774 idDelta = [] 775 idRangeOffset = [] 776 glyphIndexArray = [] 777 for i in range(len(endCode)-1): # skip the closing codes (0xffff) 778 indices = [] 779 for charCode in range(startCode[i], endCode[i] + 1): 780 indices.append(cmap[charCode]) 781 if (indices == list(range(indices[0], indices[0] + len(indices)))): 782 idDeltaTemp = self.setIDDelta(indices[0] - startCode[i]) 783 idDelta.append( idDeltaTemp) 784 idRangeOffset.append(0) 785 else: 786 # someone *definitely* needs to get killed. 787 idDelta.append(0) 788 idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i)) 789 glyphIndexArray.extend(indices) 790 idDelta.append(1) # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef 791 idRangeOffset.append(0) 792 793 # Insane. 794 segCount = len(endCode) 795 segCountX2 = segCount * 2 796 maxExponent = maxPowerOfTwo(segCount) 797 searchRange = 2 * (2 ** maxExponent) 798 entrySelector = maxExponent 799 rangeShift = 2 * segCount - searchRange 800 801 charCodeArray = array.array("H", endCode + [0] + startCode) 802 idDeltaeArray = array.array("h", idDelta) 803 restArray = array.array("H", idRangeOffset + glyphIndexArray) 804 if sys.byteorder != "big": 805 charCodeArray.byteswap() 806 idDeltaeArray.byteswap() 807 restArray.byteswap() 808 data = charCodeArray.tostring() + idDeltaeArray.tostring() + restArray.tostring() 809 810 length = struct.calcsize(cmap_format_4_format) + len(data) 811 header = struct.pack(cmap_format_4_format, self.format, length, self.language, 812 segCountX2, searchRange, entrySelector, rangeShift) 813 return header + data 814 815 def fromXML(self, name, attrs, content, ttFont): 816 self.language = safeEval(attrs["language"]) 817 if not hasattr(self, "cmap"): 818 self.cmap = {} 819 cmap = self.cmap 820 821 for element in content: 822 if not isinstance(element, tuple): 823 continue 824 nameMap, attrsMap, dummyContent = element 825 if nameMap != "map": 826 assert 0, "Unrecognized keyword in cmap subtable" 827 cmap[safeEval(attrsMap["code"])] = attrsMap["name"] 828 829 830class cmap_format_6(CmapSubtable): 831 832 def decompile(self, data, ttFont): 833 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 834 # If not, someone is calling the subtable decompile() directly, and must provide both args. 835 if data is not None and ttFont is not None: 836 self.decompileHeader(data[offset:offset+int(length)], ttFont) 837 else: 838 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 839 840 data = self.data # decompileHeader assigns the data after the header to self.data 841 firstCode, entryCount = struct.unpack(">HH", data[:4]) 842 firstCode = int(firstCode) 843 data = data[4:] 844 #assert len(data) == 2 * entryCount # XXX not true in Apple's Helvetica!!! 845 glyphIndexArray = array.array("H") 846 glyphIndexArray.fromstring(data[:2 * int(entryCount)]) 847 if sys.byteorder != "big": 848 glyphIndexArray.byteswap() 849 self.data = data = None 850 851 self.cmap = cmap = {} 852 853 lenArray = len(glyphIndexArray) 854 charCodes = list(range(firstCode, firstCode + lenArray)) 855 glyphOrder = self.ttFont.getGlyphOrder() 856 try: 857 names = list(map(operator.getitem, [glyphOrder]*lenArray, glyphIndexArray )) 858 except IndexError: 859 getGlyphName = self.ttFont.getGlyphName 860 names = list(map(getGlyphName, glyphIndexArray )) 861 list(map(operator.setitem, [cmap]*lenArray, charCodes, names)) 862 863 def compile(self, ttFont): 864 if self.data: 865 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 866 cmap = self.cmap 867 codes = list(cmap.keys()) 868 if codes: # yes, there are empty cmap tables. 869 codes = list(range(codes[0], codes[-1] + 1)) 870 firstCode = codes[0] 871 valueList = [cmap.get(code, ".notdef") for code in codes] 872 valueList = map(ttFont.getGlyphID, valueList) 873 glyphIndexArray = array.array("H", valueList) 874 if sys.byteorder != "big": 875 glyphIndexArray.byteswap() 876 data = glyphIndexArray.tostring() 877 else: 878 data = b"" 879 firstCode = 0 880 header = struct.pack(">HHHHH", 881 6, len(data) + 10, self.language, firstCode, len(codes)) 882 return header + data 883 884 def fromXML(self, name, attrs, content, ttFont): 885 self.language = safeEval(attrs["language"]) 886 if not hasattr(self, "cmap"): 887 self.cmap = {} 888 cmap = self.cmap 889 890 for element in content: 891 if not isinstance(element, tuple): 892 continue 893 name, attrs, content = element 894 if name != "map": 895 continue 896 cmap[safeEval(attrs["code"])] = attrs["name"] 897 898 899class cmap_format_12_or_13(CmapSubtable): 900 901 def __init__(self, format): 902 self.format = format 903 self.reserved = 0 904 self.data = None 905 self.ttFont = None 906 907 def decompileHeader(self, data, ttFont): 908 format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16]) 909 assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length) 910 self.format = format 911 self.reserved = reserved 912 self.length = length 913 self.language = language 914 self.nGroups = nGroups 915 self.data = data[16:] 916 self.ttFont = ttFont 917 918 def decompile(self, data, ttFont): 919 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 920 # If not, someone is calling the subtable decompile() directly, and must provide both args. 921 if data is not None and ttFont is not None: 922 self.decompileHeader(data[offset:offset+int(length)], ttFont) 923 else: 924 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 925 926 data = self.data # decompileHeader assigns the data after the header to self.data 927 charCodes = [] 928 gids = [] 929 pos = 0 930 for i in range(self.nGroups): 931 startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] ) 932 pos += 12 933 lenGroup = 1 + endCharCode - startCharCode 934 charCodes += list(range(startCharCode, endCharCode +1)) 935 gids += self._computeGIDs(glyphID, lenGroup) 936 self.data = data = None 937 self.cmap = cmap = {} 938 lenCmap = len(gids) 939 glyphOrder = self.ttFont.getGlyphOrder() 940 try: 941 names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids )) 942 except IndexError: 943 getGlyphName = self.ttFont.getGlyphName 944 names = list(map(getGlyphName, gids )) 945 list(map(operator.setitem, [cmap]*lenCmap, charCodes, names)) 946 947 def compile(self, ttFont): 948 if self.data: 949 return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data 950 charCodes = list(self.cmap.keys()) 951 lenCharCodes = len(charCodes) 952 names = list(self.cmap.values()) 953 nameMap = ttFont.getReverseGlyphMap() 954 try: 955 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) 956 except KeyError: 957 nameMap = ttFont.getReverseGlyphMap(rebuild=True) 958 try: 959 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) 960 except KeyError: 961 # allow virtual GIDs in format 12 tables 962 gids = [] 963 for name in names: 964 try: 965 gid = nameMap[name] 966 except KeyError: 967 try: 968 if (name[:3] == 'gid'): 969 gid = eval(name[3:]) 970 else: 971 gid = ttFont.getGlyphID(name) 972 except: 973 raise KeyError(name) 974 975 gids.append(gid) 976 977 cmap = {} # code:glyphID mapping 978 list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids)) 979 980 charCodes.sort() 981 index = 0 982 startCharCode = charCodes[0] 983 startGlyphID = cmap[startCharCode] 984 lastGlyphID = startGlyphID - self._format_step 985 lastCharCode = startCharCode - 1 986 nGroups = 0 987 dataList = [] 988 maxIndex = len(charCodes) 989 for index in range(maxIndex): 990 charCode = charCodes[index] 991 glyphID = cmap[charCode] 992 if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode): 993 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) 994 startCharCode = charCode 995 startGlyphID = glyphID 996 nGroups = nGroups + 1 997 lastGlyphID = glyphID 998 lastCharCode = charCode 999 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) 1000 nGroups = nGroups + 1 1001 data = bytesjoin(dataList) 1002 lengthSubtable = len(data) +16 1003 assert len(data) == (nGroups*12) == (lengthSubtable-16) 1004 return struct.pack(">HHLLL", self.format, self.reserved , lengthSubtable, self.language, nGroups) + data 1005 1006 def toXML(self, writer, ttFont): 1007 writer.begintag(self.__class__.__name__, [ 1008 ("platformID", self.platformID), 1009 ("platEncID", self.platEncID), 1010 ("format", self.format), 1011 ("reserved", self.reserved), 1012 ("length", self.length), 1013 ("language", self.language), 1014 ("nGroups", self.nGroups), 1015 ]) 1016 writer.newline() 1017 codes = sorted(self.cmap.items()) 1018 self._writeCodes(codes, writer) 1019 writer.endtag(self.__class__.__name__) 1020 writer.newline() 1021 1022 def fromXML(self, name, attrs, content, ttFont): 1023 self.format = safeEval(attrs["format"]) 1024 self.reserved = safeEval(attrs["reserved"]) 1025 self.length = safeEval(attrs["length"]) 1026 self.language = safeEval(attrs["language"]) 1027 self.nGroups = safeEval(attrs["nGroups"]) 1028 if not hasattr(self, "cmap"): 1029 self.cmap = {} 1030 cmap = self.cmap 1031 1032 for element in content: 1033 if not isinstance(element, tuple): 1034 continue 1035 name, attrs, content = element 1036 if name != "map": 1037 continue 1038 cmap[safeEval(attrs["code"])] = attrs["name"] 1039 1040 1041class cmap_format_12(cmap_format_12_or_13): 1042 def __init__(self, format): 1043 cmap_format_12_or_13.__init__(self, format) 1044 self._format_step = 1 1045 1046 def _computeGIDs(self, startingGlyph, numberOfGlyphs): 1047 return list(range(startingGlyph, startingGlyph + numberOfGlyphs)) 1048 1049 def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): 1050 return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode) 1051 1052 1053class cmap_format_13(cmap_format_12_or_13): 1054 def __init__(self, format): 1055 cmap_format_12_or_13.__init__(self, format) 1056 self._format_step = 0 1057 1058 def _computeGIDs(self, startingGlyph, numberOfGlyphs): 1059 return [startingGlyph] * numberOfGlyphs 1060 1061 def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): 1062 return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode) 1063 1064 1065def cvtToUVS(threeByteString): 1066 data = b"\0" + threeByteString 1067 val, = struct.unpack(">L", data) 1068 return val 1069 1070def cvtFromUVS(val): 1071 assert 0 <= val < 0x1000000 1072 fourByteString = struct.pack(">L", val) 1073 return fourByteString[1:] 1074 1075 1076class cmap_format_14(CmapSubtable): 1077 1078 def decompileHeader(self, data, ttFont): 1079 format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10]) 1080 self.data = data[10:] 1081 self.length = length 1082 self.numVarSelectorRecords = numVarSelectorRecords 1083 self.ttFont = ttFont 1084 self.language = 0xFF # has no language. 1085 1086 def decompile(self, data, ttFont): 1087 if data is not None and ttFont is not None and ttFont.lazy: 1088 self.decompileHeader(data, ttFont) 1089 else: 1090 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 1091 data = self.data 1092 1093 self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. 1094 uvsDict = {} 1095 recOffset = 0 1096 for n in range(self.numVarSelectorRecords): 1097 uvs, defOVSOffset, nonDefUVSOffset = struct.unpack(">3sLL", data[recOffset:recOffset +11]) 1098 recOffset += 11 1099 varUVS = cvtToUVS(uvs) 1100 if defOVSOffset: 1101 startOffset = defOVSOffset - 10 1102 numValues, = struct.unpack(">L", data[startOffset:startOffset+4]) 1103 startOffset +=4 1104 for r in range(numValues): 1105 uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4]) 1106 startOffset += 4 1107 firstBaseUV = cvtToUVS(uv) 1108 cnt = addtlCnt+1 1109 baseUVList = list(range(firstBaseUV, firstBaseUV+cnt)) 1110 glyphList = [None]*cnt 1111 localUVList = zip(baseUVList, glyphList) 1112 try: 1113 uvsDict[varUVS].extend(localUVList) 1114 except KeyError: 1115 uvsDict[varUVS] = list(localUVList) 1116 1117 if nonDefUVSOffset: 1118 startOffset = nonDefUVSOffset - 10 1119 numRecs, = struct.unpack(">L", data[startOffset:startOffset+4]) 1120 startOffset +=4 1121 localUVList = [] 1122 for r in range(numRecs): 1123 uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5]) 1124 startOffset += 5 1125 uv = cvtToUVS(uv) 1126 glyphName = self.ttFont.getGlyphName(gid) 1127 localUVList.append( [uv, glyphName] ) 1128 try: 1129 uvsDict[varUVS].extend(localUVList) 1130 except KeyError: 1131 uvsDict[varUVS] = localUVList 1132 1133 self.uvsDict = uvsDict 1134 1135 def toXML(self, writer, ttFont): 1136 writer.begintag(self.__class__.__name__, [ 1137 ("platformID", self.platformID), 1138 ("platEncID", self.platEncID), 1139 ("format", self.format), 1140 ("length", self.length), 1141 ("numVarSelectorRecords", self.numVarSelectorRecords), 1142 ]) 1143 writer.newline() 1144 uvsDict = self.uvsDict 1145 uvsList = sorted(uvsDict.keys()) 1146 for uvs in uvsList: 1147 uvList = uvsDict[uvs] 1148 uvList.sort(key=lambda item: (item[1] is not None, item[0], item[1])) 1149 for uv, gname in uvList: 1150 if gname is None: 1151 gname = "None" 1152 # I use the arg rather than th keyword syntax in order to preserve the attribute order. 1153 writer.simpletag("map", [ ("uvs",hex(uvs)), ("uv",hex(uv)), ("name", gname)] ) 1154 writer.newline() 1155 writer.endtag(self.__class__.__name__) 1156 writer.newline() 1157 1158 def fromXML(self, name, attrs, content, ttFont): 1159 self.format = safeEval(attrs["format"]) 1160 self.length = safeEval(attrs["length"]) 1161 self.numVarSelectorRecords = safeEval(attrs["numVarSelectorRecords"]) 1162 self.language = 0xFF # provide a value so that CmapSubtable.__lt__() won't fail 1163 if not hasattr(self, "cmap"): 1164 self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. 1165 if not hasattr(self, "uvsDict"): 1166 self.uvsDict = {} 1167 uvsDict = self.uvsDict 1168 1169 for element in content: 1170 if not isinstance(element, tuple): 1171 continue 1172 name, attrs, content = element 1173 if name != "map": 1174 continue 1175 uvs = safeEval(attrs["uvs"]) 1176 uv = safeEval(attrs["uv"]) 1177 gname = attrs["name"] 1178 if gname == "None": 1179 gname = None 1180 try: 1181 uvsDict[uvs].append( [uv, gname]) 1182 except KeyError: 1183 uvsDict[uvs] = [ [uv, gname] ] 1184 1185 1186 def compile(self, ttFont): 1187 if self.data: 1188 return struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) + self.data 1189 1190 uvsDict = self.uvsDict 1191 uvsList = sorted(uvsDict.keys()) 1192 self.numVarSelectorRecords = len(uvsList) 1193 offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block. 1194 data = [] 1195 varSelectorRecords =[] 1196 for uvs in uvsList: 1197 entryList = uvsDict[uvs] 1198 1199 defList = [entry for entry in entryList if entry[1] is None] 1200 if defList: 1201 defList = [entry[0] for entry in defList] 1202 defOVSOffset = offset 1203 defList.sort() 1204 1205 lastUV = defList[0] 1206 cnt = -1 1207 defRecs = [] 1208 for defEntry in defList: 1209 cnt +=1 1210 if (lastUV+cnt) != defEntry: 1211 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1) 1212 lastUV = defEntry 1213 defRecs.append(rec) 1214 cnt = 0 1215 1216 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt) 1217 defRecs.append(rec) 1218 1219 numDefRecs = len(defRecs) 1220 data.append(struct.pack(">L", numDefRecs)) 1221 data.extend(defRecs) 1222 offset += 4 + numDefRecs*4 1223 else: 1224 defOVSOffset = 0 1225 1226 ndefList = [entry for entry in entryList if entry[1] is not None] 1227 if ndefList: 1228 nonDefUVSOffset = offset 1229 ndefList.sort() 1230 numNonDefRecs = len(ndefList) 1231 data.append(struct.pack(">L", numNonDefRecs)) 1232 offset += 4 + numNonDefRecs*5 1233 1234 for uv, gname in ndefList: 1235 gid = ttFont.getGlyphID(gname) 1236 ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid) 1237 data.append(ndrec) 1238 else: 1239 nonDefUVSOffset = 0 1240 1241 vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset) 1242 varSelectorRecords.append(vrec) 1243 1244 data = bytesjoin(varSelectorRecords) + bytesjoin(data) 1245 self.length = 10 + len(data) 1246 headerdata = struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) 1247 self.data = headerdata + data 1248 1249 return self.data 1250 1251 1252class cmap_format_unknown(CmapSubtable): 1253 1254 def toXML(self, writer, ttFont): 1255 cmapName = self.__class__.__name__[:12] + str(self.format) 1256 writer.begintag(cmapName, [ 1257 ("platformID", self.platformID), 1258 ("platEncID", self.platEncID), 1259 ]) 1260 writer.newline() 1261 writer.dumphex(self.data) 1262 writer.endtag(cmapName) 1263 writer.newline() 1264 1265 def fromXML(self, name, attrs, content, ttFont): 1266 self.data = readHex(content) 1267 self.cmap = {} 1268 1269 def decompileHeader(self, data, ttFont): 1270 self.language = 0 # dummy value 1271 self.data = data 1272 1273 def decompile(self, data, ttFont): 1274 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 1275 # If not, someone is calling the subtable decompile() directly, and must provide both args. 1276 if data is not None and ttFont is not None: 1277 self.decompileHeader(data[offset:offset+int(length)], ttFont) 1278 else: 1279 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 1280 1281 def compile(self, ttFont): 1282 if self.data: 1283 return self.data 1284 else: 1285 return None 1286 1287cmap_classes = { 1288 0: cmap_format_0, 1289 2: cmap_format_2, 1290 4: cmap_format_4, 1291 6: cmap_format_6, 1292 12: cmap_format_12, 1293 13: cmap_format_13, 1294 14: cmap_format_14, 1295 } 1296