_c_m_a_p.py revision 0d182bfb8078665313280db759b782c3144f65fa
1from __future__ import print_function, division, absolute_import 2from fontTools.misc.py23 import * 3from fontTools.misc.textTools import safeEval, readHex 4from fontTools.unicode import Unicode 5from . import DefaultTable 6import sys 7import struct 8import array 9import operator 10 11 12class table__c_m_a_p(DefaultTable.DefaultTable): 13 14 def getcmap(self, platformID, platEncID): 15 for subtable in self.tables: 16 if (subtable.platformID == platformID and 17 subtable.platEncID == platEncID): 18 return subtable 19 return None # not found 20 21 def decompile(self, data, ttFont): 22 tableVersion, numSubTables = struct.unpack(">HH", data[:4]) 23 self.tableVersion = int(tableVersion) 24 self.tables = tables = [] 25 seenOffsets = {} 26 for i in range(numSubTables): 27 platformID, platEncID, offset = struct.unpack( 28 ">HHl", data[4+i*8:4+(i+1)*8]) 29 platformID, platEncID = int(platformID), int(platEncID) 30 format, length = struct.unpack(">HH", data[offset:offset+4]) 31 if format in [8,10,12,13]: 32 format, reserved, length = struct.unpack(">HHL", data[offset:offset+8]) 33 elif format in [14]: 34 format, length = struct.unpack(">HL", data[offset:offset+6]) 35 36 if not length: 37 print("Error: cmap subtable is reported as having zero length: platformID %s, platEncID %s, format %s offset %s. Skipping table." % (platformID, platEncID,format, offset)) 38 continue 39 if format not in cmap_classes: 40 table = cmap_format_unknown(format) 41 else: 42 table = cmap_classes[format](format) 43 table.platformID = platformID 44 table.platEncID = platEncID 45 # Note that by default we decompile only the subtable header info; 46 # any other data gets decompiled only when an attribute of the 47 # subtable is referenced. 48 table.decompileHeader(data[offset:offset+int(length)], ttFont) 49 if offset in seenOffsets: 50 table.cmap = tables[seenOffsets[offset]].cmap 51 else: 52 seenOffsets[offset] = i 53 tables.append(table) 54 55 def compile(self, ttFont): 56 self.tables.sort() # sort according to the spec; see CmapSubtable.__lt__() 57 numSubTables = len(self.tables) 58 totalOffset = 4 + 8 * numSubTables 59 data = struct.pack(">HH", self.tableVersion, numSubTables) 60 tableData = b"" 61 seen = {} # Some tables are the same object reference. Don't compile them twice. 62 done = {} # Some tables are different objects, but compile to the same data chunk 63 for table in self.tables: 64 try: 65 offset = seen[id(table.cmap)] 66 except KeyError: 67 chunk = table.compile(ttFont) 68 if chunk in done: 69 offset = done[chunk] 70 else: 71 offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData) 72 tableData = tableData + chunk 73 data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset) 74 return data + tableData 75 76 def toXML(self, writer, ttFont): 77 writer.simpletag("tableVersion", version=self.tableVersion) 78 writer.newline() 79 for table in self.tables: 80 table.toXML(writer, ttFont) 81 82 def fromXML(self, name, attrs, content, ttFont): 83 if name == "tableVersion": 84 self.tableVersion = safeEval(attrs["version"]) 85 return 86 if name[:12] != "cmap_format_": 87 return 88 if not hasattr(self, "tables"): 89 self.tables = [] 90 format = safeEval(name[12:]) 91 if format not in cmap_classes: 92 table = cmap_format_unknown(format) 93 else: 94 table = cmap_classes[format](format) 95 table.platformID = safeEval(attrs["platformID"]) 96 table.platEncID = safeEval(attrs["platEncID"]) 97 table.fromXML(name, attrs, content, ttFont) 98 self.tables.append(table) 99 100 101class CmapSubtable(object): 102 103 def __init__(self, format): 104 self.format = format 105 self.data = None 106 self.ttFont = None 107 108 def __getattr__(self, attr): 109 # allow lazy decompilation of subtables. 110 if attr[:2] == '__': # don't handle requests for member functions like '__lt__' 111 raise AttributeError(attr) 112 if self.data is None: 113 raise AttributeError(attr) 114 self.decompile(None, None) # use saved data. 115 self.data = None # Once this table has been decompiled, make sure we don't 116 # just return the original data. Also avoids recursion when 117 # called with an attribute that the cmap subtable doesn't have. 118 return getattr(self, attr) 119 120 def decompileHeader(self, data, ttFont): 121 format, length, language = struct.unpack(">HHH", data[:6]) 122 assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length) 123 self.format = int(format) 124 self.length = int(length) 125 self.language = int(language) 126 self.data = data[6:] 127 self.ttFont = ttFont 128 129 def toXML(self, writer, ttFont): 130 writer.begintag(self.__class__.__name__, [ 131 ("platformID", self.platformID), 132 ("platEncID", self.platEncID), 133 ("language", self.language), 134 ]) 135 writer.newline() 136 codes = sorted(self.cmap.items()) 137 self._writeCodes(codes, writer) 138 writer.endtag(self.__class__.__name__) 139 writer.newline() 140 141 def isUnicode(self): 142 return (self.platformID == 0 or 143 (self.platformID == 3 and self.platEncID in [1, 10])) 144 145 def isSymbol(self): 146 return self.platformID == 3 and self.platEncID == 0 147 148 def _writeCodes(self, codes, writer): 149 isUnicode = self.isUnicode() 150 for code, name in codes: 151 writer.simpletag("map", code=hex(code), name=name) 152 if isUnicode: 153 writer.comment(Unicode[code]) 154 writer.newline() 155 156 def __lt__(self, other): 157 if not isinstance(other, CmapSubtable): 158 return NotImplemented 159 160 # implemented so that list.sort() sorts according to the spec. 161 selfTuple = ( 162 getattr(self, "platformID", None), 163 getattr(self, "platEncID", None), 164 getattr(self, "language", None), 165 self.__dict__) 166 otherTuple = ( 167 getattr(other, "platformID", None), 168 getattr(other, "platEncID", None), 169 getattr(other, "language", None), 170 other.__dict__) 171 return selfTuple < otherTuple 172 173 174class cmap_format_0(CmapSubtable): 175 176 def decompile(self, data, ttFont): 177 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 178 # If not, someone is calling the subtable decompile() directly, and must provide both args. 179 if data is not None and ttFont is not None: 180 self.decompileHeader(data[offset:offset+int(length)], ttFont) 181 else: 182 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 183 data = self.data # decompileHeader assigns the data after the header to self.data 184 assert 262 == self.length, "Format 0 cmap subtable not 262 bytes" 185 glyphIdArray = array.array("B") 186 glyphIdArray.fromstring(self.data) 187 self.cmap = cmap = {} 188 lenArray = len(glyphIdArray) 189 charCodes = list(range(lenArray)) 190 names = map(self.ttFont.getGlyphName, glyphIdArray) 191 list(map(operator.setitem, [cmap]*lenArray, charCodes, names)) 192 193 194 def compile(self, ttFont): 195 if self.data: 196 return struct.pack(">HHH", 0, 262, self.language) + self.data 197 198 charCodeList = sorted(self.cmap.items()) 199 charCodes = [entry[0] for entry in charCodeList] 200 valueList = [entry[1] for entry in charCodeList] 201 assert charCodes == list(range(256)) 202 valueList = map(ttFont.getGlyphID, valueList) 203 204 glyphIdArray = array.array("B", valueList) 205 data = struct.pack(">HHH", 0, 262, self.language) + glyphIdArray.tostring() 206 assert len(data) == 262 207 return data 208 209 def fromXML(self, name, attrs, content, ttFont): 210 self.language = safeEval(attrs["language"]) 211 if not hasattr(self, "cmap"): 212 self.cmap = {} 213 cmap = self.cmap 214 for element in content: 215 if not isinstance(element, tuple): 216 continue 217 name, attrs, content = element 218 if name != "map": 219 continue 220 cmap[safeEval(attrs["code"])] = attrs["name"] 221 222 223subHeaderFormat = ">HHhH" 224class SubHeader(object): 225 def __init__(self): 226 self.firstCode = None 227 self.entryCount = None 228 self.idDelta = None 229 self.idRangeOffset = None 230 self.glyphIndexArray = [] 231 232class cmap_format_2(CmapSubtable): 233 234 def setIDDelta(self, subHeader): 235 subHeader.idDelta = 0 236 # find the minGI which is not zero. 237 minGI = subHeader.glyphIndexArray[0] 238 for gid in subHeader.glyphIndexArray: 239 if (gid != 0) and (gid < minGI): 240 minGI = gid 241 # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1. 242 # idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K. 243 # We would like to pick an idDelta such that the first glyphArray GID is 1, 244 # so that we are more likely to be able to combine glypharray GID subranges. 245 # This means that we have a problem when minGI is > 32K 246 # Since the final gi is reconstructed from the glyphArray GID by: 247 # (short)finalGID = (gid + idDelta) % 0x10000), 248 # we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the 249 # negative number to an unsigned short. 250 251 if (minGI > 1): 252 if minGI > 0x7FFF: 253 subHeader.idDelta = -(0x10000 - minGI) -1 254 else: 255 subHeader.idDelta = minGI -1 256 idDelta = subHeader.idDelta 257 for i in range(subHeader.entryCount): 258 gid = subHeader.glyphIndexArray[i] 259 if gid > 0: 260 subHeader.glyphIndexArray[i] = gid - idDelta 261 262 263 def decompile(self, data, ttFont): 264 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 265 # If not, someone is calling the subtable decompile() directly, and must provide both args. 266 if data is not None and ttFont is not None: 267 self.decompileHeader(data[offset:offset+int(length)], ttFont) 268 else: 269 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 270 271 data = self.data # decompileHeader assigns the data after the header to self.data 272 subHeaderKeys = [] 273 maxSubHeaderindex = 0 274 # get the key array, and determine the number of subHeaders. 275 allKeys = array.array("H") 276 allKeys.fromstring(data[:512]) 277 data = data[512:] 278 if sys.byteorder != "big": 279 allKeys.byteswap() 280 subHeaderKeys = [ key//8 for key in allKeys] 281 maxSubHeaderindex = max(subHeaderKeys) 282 283 #Load subHeaders 284 subHeaderList = [] 285 pos = 0 286 for i in range(maxSubHeaderindex + 1): 287 subHeader = SubHeader() 288 (subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \ 289 subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8]) 290 pos += 8 291 giDataPos = pos + subHeader.idRangeOffset-2 292 giList = array.array("H") 293 giList.fromstring(data[giDataPos:giDataPos + subHeader.entryCount*2]) 294 if sys.byteorder != "big": 295 giList.byteswap() 296 subHeader.glyphIndexArray = giList 297 subHeaderList.append(subHeader) 298 # How this gets processed. 299 # Charcodes may be one or two bytes. 300 # The first byte of a charcode is mapped through the subHeaderKeys, to select 301 # a subHeader. For any subheader but 0, the next byte is then mapped through the 302 # selected subheader. If subheader Index 0 is selected, then the byte itself is 303 # mapped through the subheader, and there is no second byte. 304 # Then assume that the subsequent byte is the first byte of the next charcode,and repeat. 305 # 306 # Each subheader references a range in the glyphIndexArray whose length is entryCount. 307 # The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray 308 # referenced by another subheader. 309 # The only subheader that will be referenced by more than one first-byte value is the subheader 310 # that maps the entire range of glyphID values to glyphIndex 0, e.g notdef: 311 # {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx} 312 # A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex. 313 # A subheader specifies a subrange within (0...256) by the 314 # firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero 315 # (e.g. glyph not in font). 316 # If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar). 317 # The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by 318 # counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the 319 # glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex. 320 # Example for Logocut-Medium 321 # first byte of charcode = 129; selects subheader 1. 322 # subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252} 323 # second byte of charCode = 66 324 # the index offset = 66-64 = 2. 325 # The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is: 326 # [glyphIndexArray index], [subrange array index] = glyphIndex 327 # [256], [0]=1 from charcode [129, 64] 328 # [257], [1]=2 from charcode [129, 65] 329 # [258], [2]=3 from charcode [129, 66] 330 # [259], [3]=4 from charcode [129, 67] 331 # So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero, 332 # add it to the glyphID to get the final glyphIndex 333 # value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew! 334 335 self.data = b"" 336 self.cmap = cmap = {} 337 notdefGI = 0 338 for firstByte in range(256): 339 subHeadindex = subHeaderKeys[firstByte] 340 subHeader = subHeaderList[subHeadindex] 341 if subHeadindex == 0: 342 if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount): 343 continue # gi is notdef. 344 else: 345 charCode = firstByte 346 offsetIndex = firstByte - subHeader.firstCode 347 gi = subHeader.glyphIndexArray[offsetIndex] 348 if gi != 0: 349 gi = (gi + subHeader.idDelta) % 0x10000 350 else: 351 continue # gi is notdef. 352 cmap[charCode] = gi 353 else: 354 if subHeader.entryCount: 355 charCodeOffset = firstByte * 256 + subHeader.firstCode 356 for offsetIndex in range(subHeader.entryCount): 357 charCode = charCodeOffset + offsetIndex 358 gi = subHeader.glyphIndexArray[offsetIndex] 359 if gi != 0: 360 gi = (gi + subHeader.idDelta) % 0x10000 361 else: 362 continue 363 cmap[charCode] = gi 364 # If not subHeader.entryCount, then all char codes with this first byte are 365 # mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the 366 # same as mapping it to .notdef. 367 # cmap values are GID's. 368 glyphOrder = self.ttFont.getGlyphOrder() 369 gids = list(cmap.values()) 370 charCodes = list(cmap.keys()) 371 lenCmap = len(gids) 372 try: 373 names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids )) 374 except IndexError: 375 getGlyphName = self.ttFont.getGlyphName 376 names = list(map(getGlyphName, gids )) 377 list(map(operator.setitem, [cmap]*lenCmap, charCodes, names)) 378 379 380 def compile(self, ttFont): 381 if self.data: 382 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 383 kEmptyTwoCharCodeRange = -1 384 notdefGI = 0 385 386 items = sorted(self.cmap.items()) 387 charCodes = [item[0] for item in items] 388 names = [item[1] for item in items] 389 nameMap = ttFont.getReverseGlyphMap() 390 lenCharCodes = len(charCodes) 391 try: 392 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) 393 except KeyError: 394 nameMap = ttFont.getReverseGlyphMap(rebuild=True) 395 try: 396 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) 397 except KeyError: 398 # allow virtual GIDs in format 2 tables 399 gids = [] 400 for name in names: 401 try: 402 gid = nameMap[name] 403 except KeyError: 404 try: 405 if (name[:3] == 'gid'): 406 gid = eval(name[3:]) 407 else: 408 gid = ttFont.getGlyphID(name) 409 except: 410 raise KeyError(name) 411 412 gids.append(gid) 413 414 # Process the (char code to gid) item list in char code order. 415 # By definition, all one byte char codes map to subheader 0. 416 # For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0, 417 # which defines all char codes in its range to map to notdef) unless proven otherwise. 418 # Note that since the char code items are processed in char code order, all the char codes with the 419 # same first byte are in sequential order. 420 421 subHeaderKeys = [ kEmptyTwoCharCodeRange for x in range(256)] # list of indices into subHeaderList. 422 subHeaderList = [] 423 424 # We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up 425 # with a cmap where all the one byte char codes map to notdef, 426 # with the result that the subhead 0 would not get created just by processing the item list. 427 charCode = charCodes[0] 428 if charCode > 255: 429 subHeader = SubHeader() 430 subHeader.firstCode = 0 431 subHeader.entryCount = 0 432 subHeader.idDelta = 0 433 subHeader.idRangeOffset = 0 434 subHeaderList.append(subHeader) 435 436 437 lastFirstByte = -1 438 items = zip(charCodes, gids) 439 for charCode, gid in items: 440 if gid == 0: 441 continue 442 firstbyte = charCode >> 8 443 secondByte = charCode & 0x00FF 444 445 if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one. 446 if lastFirstByte > -1: 447 # fix GI's and iDelta of current subheader. 448 self.setIDDelta(subHeader) 449 450 # If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero 451 # for the indices matching the char codes. 452 if lastFirstByte == 0: 453 for index in range(subHeader.entryCount): 454 charCode = subHeader.firstCode + index 455 subHeaderKeys[charCode] = 0 456 457 assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange." 458 # init new subheader 459 subHeader = SubHeader() 460 subHeader.firstCode = secondByte 461 subHeader.entryCount = 1 462 subHeader.glyphIndexArray.append(gid) 463 subHeaderList.append(subHeader) 464 subHeaderKeys[firstbyte] = len(subHeaderList) -1 465 lastFirstByte = firstbyte 466 else: 467 # need to fill in with notdefs all the code points between the last charCode and the current charCode. 468 codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount) 469 for i in range(codeDiff): 470 subHeader.glyphIndexArray.append(notdefGI) 471 subHeader.glyphIndexArray.append(gid) 472 subHeader.entryCount = subHeader.entryCount + codeDiff + 1 473 474 # fix GI's and iDelta of last subheader that we we added to the subheader array. 475 self.setIDDelta(subHeader) 476 477 # Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges. 478 subHeader = SubHeader() 479 subHeader.firstCode = 0 480 subHeader.entryCount = 0 481 subHeader.idDelta = 0 482 subHeader.idRangeOffset = 2 483 subHeaderList.append(subHeader) 484 emptySubheadIndex = len(subHeaderList) - 1 485 for index in range(256): 486 if subHeaderKeys[index] == kEmptyTwoCharCodeRange: 487 subHeaderKeys[index] = emptySubheadIndex 488 # Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the 489 # idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray, 490 # since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with 491 # charcode 0 and GID 0. 492 493 idRangeOffset = (len(subHeaderList)-1)*8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset. 494 subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2. 495 for index in range(subheadRangeLen): 496 subHeader = subHeaderList[index] 497 subHeader.idRangeOffset = 0 498 for j in range(index): 499 prevSubhead = subHeaderList[j] 500 if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray 501 subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8 502 subHeader.glyphIndexArray = [] 503 break 504 if subHeader.idRangeOffset == 0: # didn't find one. 505 subHeader.idRangeOffset = idRangeOffset 506 idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray. 507 else: 508 idRangeOffset = idRangeOffset - 8 # one less subheader 509 510 # Now we can write out the data! 511 length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array. 512 for subhead in subHeaderList[:-1]: 513 length = length + len(subhead.glyphIndexArray)*2 # We can't use subhead.entryCount, as some of the subhead may share subArrays. 514 dataList = [struct.pack(">HHH", 2, length, self.language)] 515 for index in subHeaderKeys: 516 dataList.append(struct.pack(">H", index*8)) 517 for subhead in subHeaderList: 518 dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset)) 519 for subhead in subHeaderList[:-1]: 520 for gi in subhead.glyphIndexArray: 521 dataList.append(struct.pack(">H", gi)) 522 data = bytesjoin(dataList) 523 assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length) 524 return data 525 526 527 def fromXML(self, name, attrs, content, ttFont): 528 self.language = safeEval(attrs["language"]) 529 if not hasattr(self, "cmap"): 530 self.cmap = {} 531 cmap = self.cmap 532 533 for element in content: 534 if not isinstance(element, tuple): 535 continue 536 name, attrs, content = element 537 if name != "map": 538 continue 539 cmap[safeEval(attrs["code"])] = attrs["name"] 540 541 542cmap_format_4_format = ">7H" 543 544#uint16 endCode[segCount] # Ending character code for each segment, last = 0xFFFF. 545#uint16 reservedPad # This value should be zero 546#uint16 startCode[segCount] # Starting character code for each segment 547#uint16 idDelta[segCount] # Delta for all character codes in segment 548#uint16 idRangeOffset[segCount] # Offset in bytes to glyph indexArray, or 0 549#uint16 glyphIndexArray[variable] # Glyph index array 550 551def splitRange(startCode, endCode, cmap): 552 # Try to split a range of character codes into subranges with consecutive 553 # glyph IDs in such a way that the cmap4 subtable can be stored "most" 554 # efficiently. I can't prove I've got the optimal solution, but it seems 555 # to do well with the fonts I tested: none became bigger, many became smaller. 556 if startCode == endCode: 557 return [], [endCode] 558 559 lastID = cmap[startCode] 560 lastCode = startCode 561 inOrder = None 562 orderedBegin = None 563 subRanges = [] 564 565 # Gather subranges in which the glyph IDs are consecutive. 566 for code in range(startCode + 1, endCode + 1): 567 glyphID = cmap[code] 568 569 if glyphID - 1 == lastID: 570 if inOrder is None or not inOrder: 571 inOrder = 1 572 orderedBegin = lastCode 573 else: 574 if inOrder: 575 inOrder = 0 576 subRanges.append((orderedBegin, lastCode)) 577 orderedBegin = None 578 579 lastID = glyphID 580 lastCode = code 581 582 if inOrder: 583 subRanges.append((orderedBegin, lastCode)) 584 assert lastCode == endCode 585 586 # Now filter out those new subranges that would only make the data bigger. 587 # A new segment cost 8 bytes, not using a new segment costs 2 bytes per 588 # character. 589 newRanges = [] 590 for b, e in subRanges: 591 if b == startCode and e == endCode: 592 break # the whole range, we're fine 593 if b == startCode or e == endCode: 594 threshold = 4 # split costs one more segment 595 else: 596 threshold = 8 # split costs two more segments 597 if (e - b + 1) > threshold: 598 newRanges.append((b, e)) 599 subRanges = newRanges 600 601 if not subRanges: 602 return [], [endCode] 603 604 if subRanges[0][0] != startCode: 605 subRanges.insert(0, (startCode, subRanges[0][0] - 1)) 606 if subRanges[-1][1] != endCode: 607 subRanges.append((subRanges[-1][1] + 1, endCode)) 608 609 # Fill the "holes" in the segments list -- those are the segments in which 610 # the glyph IDs are _not_ consecutive. 611 i = 1 612 while i < len(subRanges): 613 if subRanges[i-1][1] + 1 != subRanges[i][0]: 614 subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1)) 615 i = i + 1 616 i = i + 1 617 618 # Transform the ranges into startCode/endCode lists. 619 start = [] 620 end = [] 621 for b, e in subRanges: 622 start.append(b) 623 end.append(e) 624 start.pop(0) 625 626 assert len(start) + 1 == len(end) 627 return start, end 628 629 630class cmap_format_4(CmapSubtable): 631 632 def decompile(self, data, ttFont): 633 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 634 # If not, someone is calling the subtable decompile() directly, and must provide both args. 635 if data is not None and ttFont is not None: 636 self.decompileHeader(self.data[offset:offset+int(length)], ttFont) 637 else: 638 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 639 640 data = self.data # decompileHeader assigns the data after the header to self.data 641 (segCountX2, searchRange, entrySelector, rangeShift) = \ 642 struct.unpack(">4H", data[:8]) 643 data = data[8:] 644 segCount = segCountX2 // 2 645 646 allCodes = array.array("H") 647 allCodes.fromstring(data) 648 self.data = data = None 649 650 if sys.byteorder != "big": 651 allCodes.byteswap() 652 653 # divide the data 654 endCode = allCodes[:segCount] 655 allCodes = allCodes[segCount+1:] # the +1 is skipping the reservedPad field 656 startCode = allCodes[:segCount] 657 allCodes = allCodes[segCount:] 658 idDelta = allCodes[:segCount] 659 allCodes = allCodes[segCount:] 660 idRangeOffset = allCodes[:segCount] 661 glyphIndexArray = allCodes[segCount:] 662 lenGIArray = len(glyphIndexArray) 663 664 # build 2-byte character mapping 665 charCodes = [] 666 gids = [] 667 for i in range(len(startCode) - 1): # don't do 0xffff! 668 rangeCharCodes = list(range(startCode[i], endCode[i] + 1)) 669 charCodes.extend(rangeCharCodes) 670 for charCode in rangeCharCodes: 671 rangeOffset = idRangeOffset[i] 672 if rangeOffset == 0: 673 glyphID = charCode + idDelta[i] 674 else: 675 # *someone* needs to get killed. 676 index = rangeOffset // 2 + (charCode - startCode[i]) + i - len(idRangeOffset) 677 assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array is not less than the length of the array (%d) !" % (i, index, lenGIArray) 678 if glyphIndexArray[index] != 0: # if not missing glyph 679 glyphID = glyphIndexArray[index] + idDelta[i] 680 else: 681 glyphID = 0 # missing glyph 682 gids.append(glyphID % 0x10000) 683 684 self.cmap = cmap = {} 685 lenCmap = len(gids) 686 glyphOrder = self.ttFont.getGlyphOrder() 687 try: 688 names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids )) 689 except IndexError: 690 getGlyphName = self.ttFont.getGlyphName 691 names = list(map(getGlyphName, gids )) 692 list(map(operator.setitem, [cmap]*lenCmap, charCodes, names)) 693 694 695 696 def setIDDelta(self, idDelta): 697 # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1. 698 # idDelta is a short, and must be between -32K and 32K 699 # startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1 700 # This means that we have a problem because we can need to assign to idDelta values 701 # between -(64K-2) and 64K -1. 702 # Since the final gi is reconstructed from the glyphArray GID by: 703 # (short)finalGID = (gid + idDelta) % 0x10000), 704 # we can get from a startCode of 0 to a final GID of 64 -1K by subtracting 1, and casting the 705 # negative number to an unsigned short. 706 # Similarly , we can get from a startCode of 64K-1 to a final GID of 1 by adding 2, because of 707 # the modulo arithmetic. 708 709 if idDelta > 0x7FFF: 710 idDelta = idDelta - 0x10000 711 elif idDelta < -0x7FFF: 712 idDelta = idDelta + 0x10000 713 714 return idDelta 715 716 717 def compile(self, ttFont): 718 if self.data: 719 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 720 721 from fontTools.ttLib.sfnt import maxPowerOfTwo 722 723 charCodes = list(self.cmap.keys()) 724 lenCharCodes = len(charCodes) 725 if lenCharCodes == 0: 726 startCode = [0xffff] 727 endCode = [0xffff] 728 else: 729 charCodes.sort() 730 names = list(map(operator.getitem, [self.cmap]*lenCharCodes, charCodes)) 731 nameMap = ttFont.getReverseGlyphMap() 732 try: 733 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) 734 except KeyError: 735 nameMap = ttFont.getReverseGlyphMap(rebuild=True) 736 try: 737 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) 738 except KeyError: 739 # allow virtual GIDs in format 4 tables 740 gids = [] 741 for name in names: 742 try: 743 gid = nameMap[name] 744 except KeyError: 745 try: 746 if (name[:3] == 'gid'): 747 gid = eval(name[3:]) 748 else: 749 gid = ttFont.getGlyphID(name) 750 except: 751 raise KeyError(name) 752 753 gids.append(gid) 754 cmap = {} # code:glyphID mapping 755 list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids)) 756 757 # Build startCode and endCode lists. 758 # Split the char codes in ranges of consecutive char codes, then split 759 # each range in more ranges of consecutive/not consecutive glyph IDs. 760 # See splitRange(). 761 lastCode = charCodes[0] 762 endCode = [] 763 startCode = [lastCode] 764 for charCode in charCodes[1:]: # skip the first code, it's the first start code 765 if charCode == lastCode + 1: 766 lastCode = charCode 767 continue 768 start, end = splitRange(startCode[-1], lastCode, cmap) 769 startCode.extend(start) 770 endCode.extend(end) 771 startCode.append(charCode) 772 lastCode = charCode 773 endCode.append(lastCode) 774 startCode.append(0xffff) 775 endCode.append(0xffff) 776 777 # build up rest of cruft 778 idDelta = [] 779 idRangeOffset = [] 780 glyphIndexArray = [] 781 for i in range(len(endCode)-1): # skip the closing codes (0xffff) 782 indices = [] 783 for charCode in range(startCode[i], endCode[i] + 1): 784 indices.append(cmap[charCode]) 785 if (indices == list(range(indices[0], indices[0] + len(indices)))): 786 idDeltaTemp = self.setIDDelta(indices[0] - startCode[i]) 787 idDelta.append( idDeltaTemp) 788 idRangeOffset.append(0) 789 else: 790 # someone *definitely* needs to get killed. 791 idDelta.append(0) 792 idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i)) 793 glyphIndexArray.extend(indices) 794 idDelta.append(1) # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef 795 idRangeOffset.append(0) 796 797 # Insane. 798 segCount = len(endCode) 799 segCountX2 = segCount * 2 800 maxExponent = maxPowerOfTwo(segCount) 801 searchRange = 2 * (2 ** maxExponent) 802 entrySelector = maxExponent 803 rangeShift = 2 * segCount - searchRange 804 805 charCodeArray = array.array("H", endCode + [0] + startCode) 806 idDeltaeArray = array.array("h", idDelta) 807 restArray = array.array("H", idRangeOffset + glyphIndexArray) 808 if sys.byteorder != "big": 809 charCodeArray.byteswap() 810 idDeltaeArray.byteswap() 811 restArray.byteswap() 812 data = charCodeArray.tostring() + idDeltaeArray.tostring() + restArray.tostring() 813 814 length = struct.calcsize(cmap_format_4_format) + len(data) 815 header = struct.pack(cmap_format_4_format, self.format, length, self.language, 816 segCountX2, searchRange, entrySelector, rangeShift) 817 return header + data 818 819 def fromXML(self, name, attrs, content, ttFont): 820 self.language = safeEval(attrs["language"]) 821 if not hasattr(self, "cmap"): 822 self.cmap = {} 823 cmap = self.cmap 824 825 for element in content: 826 if not isinstance(element, tuple): 827 continue 828 nameMap, attrsMap, dummyContent = element 829 if nameMap != "map": 830 assert 0, "Unrecognized keyword in cmap subtable" 831 cmap[safeEval(attrsMap["code"])] = attrsMap["name"] 832 833 834class cmap_format_6(CmapSubtable): 835 836 def decompile(self, data, ttFont): 837 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 838 # If not, someone is calling the subtable decompile() directly, and must provide both args. 839 if data is not None and ttFont is not None: 840 self.decompileHeader(data[offset:offset+int(length)], ttFont) 841 else: 842 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 843 844 data = self.data # decompileHeader assigns the data after the header to self.data 845 firstCode, entryCount = struct.unpack(">HH", data[:4]) 846 firstCode = int(firstCode) 847 data = data[4:] 848 #assert len(data) == 2 * entryCount # XXX not true in Apple's Helvetica!!! 849 glyphIndexArray = array.array("H") 850 glyphIndexArray.fromstring(data[:2 * int(entryCount)]) 851 if sys.byteorder != "big": 852 glyphIndexArray.byteswap() 853 self.data = data = None 854 855 self.cmap = cmap = {} 856 857 lenArray = len(glyphIndexArray) 858 charCodes = list(range(firstCode, firstCode + lenArray)) 859 glyphOrder = self.ttFont.getGlyphOrder() 860 try: 861 names = list(map(operator.getitem, [glyphOrder]*lenArray, glyphIndexArray )) 862 except IndexError: 863 getGlyphName = self.ttFont.getGlyphName 864 names = list(map(getGlyphName, glyphIndexArray )) 865 list(map(operator.setitem, [cmap]*lenArray, charCodes, names)) 866 867 def compile(self, ttFont): 868 if self.data: 869 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 870 cmap = self.cmap 871 codes = list(cmap.keys()) 872 if codes: # yes, there are empty cmap tables. 873 codes = list(range(codes[0], codes[-1] + 1)) 874 firstCode = codes[0] 875 valueList = [cmap.get(code, ".notdef") for code in codes] 876 valueList = map(ttFont.getGlyphID, valueList) 877 glyphIndexArray = array.array("H", valueList) 878 if sys.byteorder != "big": 879 glyphIndexArray.byteswap() 880 data = glyphIndexArray.tostring() 881 else: 882 data = b"" 883 firstCode = 0 884 header = struct.pack(">HHHHH", 885 6, len(data) + 10, self.language, firstCode, len(codes)) 886 return header + data 887 888 def fromXML(self, name, attrs, content, ttFont): 889 self.language = safeEval(attrs["language"]) 890 if not hasattr(self, "cmap"): 891 self.cmap = {} 892 cmap = self.cmap 893 894 for element in content: 895 if not isinstance(element, tuple): 896 continue 897 name, attrs, content = element 898 if name != "map": 899 continue 900 cmap[safeEval(attrs["code"])] = attrs["name"] 901 902 903class cmap_format_12_or_13(CmapSubtable): 904 905 def __init__(self, format): 906 self.format = format 907 self.reserved = 0 908 self.data = None 909 self.ttFont = None 910 911 def decompileHeader(self, data, ttFont): 912 format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16]) 913 assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length) 914 self.format = format 915 self.reserved = reserved 916 self.length = length 917 self.language = language 918 self.nGroups = nGroups 919 self.data = data[16:] 920 self.ttFont = ttFont 921 922 def decompile(self, data, ttFont): 923 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 924 # If not, someone is calling the subtable decompile() directly, and must provide both args. 925 if data is not None and ttFont is not None: 926 self.decompileHeader(data[offset:offset+int(length)], ttFont) 927 else: 928 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 929 930 data = self.data # decompileHeader assigns the data after the header to self.data 931 charCodes = [] 932 gids = [] 933 pos = 0 934 for i in range(self.nGroups): 935 startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] ) 936 pos += 12 937 lenGroup = 1 + endCharCode - startCharCode 938 charCodes += list(range(startCharCode, endCharCode +1)) 939 gids += self._computeGIDs(glyphID, lenGroup) 940 self.data = data = None 941 self.cmap = cmap = {} 942 lenCmap = len(gids) 943 glyphOrder = self.ttFont.getGlyphOrder() 944 try: 945 names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids )) 946 except IndexError: 947 getGlyphName = self.ttFont.getGlyphName 948 names = list(map(getGlyphName, gids )) 949 list(map(operator.setitem, [cmap]*lenCmap, charCodes, names)) 950 951 def compile(self, ttFont): 952 if self.data: 953 return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data 954 charCodes = list(self.cmap.keys()) 955 lenCharCodes = len(charCodes) 956 names = list(self.cmap.values()) 957 nameMap = ttFont.getReverseGlyphMap() 958 try: 959 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) 960 except KeyError: 961 nameMap = ttFont.getReverseGlyphMap(rebuild=True) 962 try: 963 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) 964 except KeyError: 965 # allow virtual GIDs in format 12 tables 966 gids = [] 967 for name in names: 968 try: 969 gid = nameMap[name] 970 except KeyError: 971 try: 972 if (name[:3] == 'gid'): 973 gid = eval(name[3:]) 974 else: 975 gid = ttFont.getGlyphID(name) 976 except: 977 raise KeyError(name) 978 979 gids.append(gid) 980 981 cmap = {} # code:glyphID mapping 982 list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids)) 983 984 charCodes.sort() 985 index = 0 986 startCharCode = charCodes[0] 987 startGlyphID = cmap[startCharCode] 988 lastGlyphID = startGlyphID - self._format_step 989 lastCharCode = startCharCode - 1 990 nGroups = 0 991 dataList = [] 992 maxIndex = len(charCodes) 993 for index in range(maxIndex): 994 charCode = charCodes[index] 995 glyphID = cmap[charCode] 996 if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode): 997 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) 998 startCharCode = charCode 999 startGlyphID = glyphID 1000 nGroups = nGroups + 1 1001 lastGlyphID = glyphID 1002 lastCharCode = charCode 1003 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) 1004 nGroups = nGroups + 1 1005 data = bytesjoin(dataList) 1006 lengthSubtable = len(data) +16 1007 assert len(data) == (nGroups*12) == (lengthSubtable-16) 1008 return struct.pack(">HHLLL", self.format, self.reserved , lengthSubtable, self.language, nGroups) + data 1009 1010 def toXML(self, writer, ttFont): 1011 writer.begintag(self.__class__.__name__, [ 1012 ("platformID", self.platformID), 1013 ("platEncID", self.platEncID), 1014 ("format", self.format), 1015 ("reserved", self.reserved), 1016 ("length", self.length), 1017 ("language", self.language), 1018 ("nGroups", self.nGroups), 1019 ]) 1020 writer.newline() 1021 codes = sorted(self.cmap.items()) 1022 self._writeCodes(codes, writer) 1023 writer.endtag(self.__class__.__name__) 1024 writer.newline() 1025 1026 def fromXML(self, name, attrs, content, ttFont): 1027 self.format = safeEval(attrs["format"]) 1028 self.reserved = safeEval(attrs["reserved"]) 1029 self.length = safeEval(attrs["length"]) 1030 self.language = safeEval(attrs["language"]) 1031 self.nGroups = safeEval(attrs["nGroups"]) 1032 if not hasattr(self, "cmap"): 1033 self.cmap = {} 1034 cmap = self.cmap 1035 1036 for element in content: 1037 if not isinstance(element, tuple): 1038 continue 1039 name, attrs, content = element 1040 if name != "map": 1041 continue 1042 cmap[safeEval(attrs["code"])] = attrs["name"] 1043 1044 1045class cmap_format_12(cmap_format_12_or_13): 1046 def __init__(self, format): 1047 cmap_format_12_or_13.__init__(self, format) 1048 self._format_step = 1 1049 1050 def _computeGIDs(self, startingGlyph, numberOfGlyphs): 1051 return list(range(startingGlyph, startingGlyph + numberOfGlyphs)) 1052 1053 def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): 1054 return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode) 1055 1056 1057class cmap_format_13(cmap_format_12_or_13): 1058 def __init__(self, format): 1059 cmap_format_12_or_13.__init__(self, format) 1060 self._format_step = 0 1061 1062 def _computeGIDs(self, startingGlyph, numberOfGlyphs): 1063 return [startingGlyph] * numberOfGlyphs 1064 1065 def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): 1066 return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode) 1067 1068 1069def cvtToUVS(threeByteString): 1070 data = b"\0" + threeByteString 1071 val, = struct.unpack(">L", data) 1072 return val 1073 1074def cvtFromUVS(val): 1075 assert 0 <= val < 0x1000000 1076 fourByteString = struct.pack(">L", val) 1077 return fourByteString[1:] 1078 1079 1080class cmap_format_14(CmapSubtable): 1081 1082 def decompileHeader(self, data, ttFont): 1083 format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10]) 1084 self.data = data[10:] 1085 self.length = length 1086 self.numVarSelectorRecords = numVarSelectorRecords 1087 self.ttFont = ttFont 1088 self.language = 0xFF # has no language. 1089 1090 def decompile(self, data, ttFont): 1091 if data is not None and ttFont is not None and ttFont.lazy: 1092 self.decompileHeader(data, ttFont) 1093 else: 1094 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 1095 data = self.data 1096 1097 self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. 1098 uvsDict = {} 1099 recOffset = 0 1100 for n in range(self.numVarSelectorRecords): 1101 uvs, defOVSOffset, nonDefUVSOffset = struct.unpack(">3sLL", data[recOffset:recOffset +11]) 1102 recOffset += 11 1103 varUVS = cvtToUVS(uvs) 1104 if defOVSOffset: 1105 startOffset = defOVSOffset - 10 1106 numValues, = struct.unpack(">L", data[startOffset:startOffset+4]) 1107 startOffset +=4 1108 for r in range(numValues): 1109 uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4]) 1110 startOffset += 4 1111 firstBaseUV = cvtToUVS(uv) 1112 cnt = addtlCnt+1 1113 baseUVList = list(range(firstBaseUV, firstBaseUV+cnt)) 1114 glyphList = [None]*cnt 1115 localUVList = zip(baseUVList, glyphList) 1116 try: 1117 uvsDict[varUVS].extend(localUVList) 1118 except KeyError: 1119 uvsDict[varUVS] = list(localUVList) 1120 1121 if nonDefUVSOffset: 1122 startOffset = nonDefUVSOffset - 10 1123 numRecs, = struct.unpack(">L", data[startOffset:startOffset+4]) 1124 startOffset +=4 1125 localUVList = [] 1126 for r in range(numRecs): 1127 uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5]) 1128 startOffset += 5 1129 uv = cvtToUVS(uv) 1130 glyphName = self.ttFont.getGlyphName(gid) 1131 localUVList.append( [uv, glyphName] ) 1132 try: 1133 uvsDict[varUVS].extend(localUVList) 1134 except KeyError: 1135 uvsDict[varUVS] = localUVList 1136 1137 self.uvsDict = uvsDict 1138 1139 def toXML(self, writer, ttFont): 1140 writer.begintag(self.__class__.__name__, [ 1141 ("platformID", self.platformID), 1142 ("platEncID", self.platEncID), 1143 ("format", self.format), 1144 ("length", self.length), 1145 ("numVarSelectorRecords", self.numVarSelectorRecords), 1146 ]) 1147 writer.newline() 1148 uvsDict = self.uvsDict 1149 uvsList = sorted(uvsDict.keys()) 1150 for uvs in uvsList: 1151 uvList = uvsDict[uvs] 1152 uvList.sort(key=lambda item: (item[1] is not None, item[0], item[1])) 1153 for uv, gname in uvList: 1154 if gname is None: 1155 gname = "None" 1156 # I use the arg rather than th keyword syntax in order to preserve the attribute order. 1157 writer.simpletag("map", [ ("uvs",hex(uvs)), ("uv",hex(uv)), ("name", gname)] ) 1158 writer.newline() 1159 writer.endtag(self.__class__.__name__) 1160 writer.newline() 1161 1162 def fromXML(self, name, attrs, content, ttFont): 1163 self.format = safeEval(attrs["format"]) 1164 self.length = safeEval(attrs["length"]) 1165 self.numVarSelectorRecords = safeEval(attrs["numVarSelectorRecords"]) 1166 self.language = 0xFF # provide a value so that CmapSubtable.__lt__() won't fail 1167 if not hasattr(self, "cmap"): 1168 self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. 1169 if not hasattr(self, "uvsDict"): 1170 self.uvsDict = {} 1171 uvsDict = self.uvsDict 1172 1173 for element in content: 1174 if not isinstance(element, tuple): 1175 continue 1176 name, attrs, content = element 1177 if name != "map": 1178 continue 1179 uvs = safeEval(attrs["uvs"]) 1180 uv = safeEval(attrs["uv"]) 1181 gname = attrs["name"] 1182 if gname == "None": 1183 gname = None 1184 try: 1185 uvsDict[uvs].append( [uv, gname]) 1186 except KeyError: 1187 uvsDict[uvs] = [ [uv, gname] ] 1188 1189 1190 def compile(self, ttFont): 1191 if self.data: 1192 return struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) + self.data 1193 1194 uvsDict = self.uvsDict 1195 uvsList = sorted(uvsDict.keys()) 1196 self.numVarSelectorRecords = len(uvsList) 1197 offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block. 1198 data = [] 1199 varSelectorRecords =[] 1200 for uvs in uvsList: 1201 entryList = uvsDict[uvs] 1202 1203 defList = [entry for entry in entryList if entry[1] is None] 1204 if defList: 1205 defList = [entry[0] for entry in defList] 1206 defOVSOffset = offset 1207 defList.sort() 1208 1209 lastUV = defList[0] 1210 cnt = -1 1211 defRecs = [] 1212 for defEntry in defList: 1213 cnt +=1 1214 if (lastUV+cnt) != defEntry: 1215 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1) 1216 lastUV = defEntry 1217 defRecs.append(rec) 1218 cnt = 0 1219 1220 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt) 1221 defRecs.append(rec) 1222 1223 numDefRecs = len(defRecs) 1224 data.append(struct.pack(">L", numDefRecs)) 1225 data.extend(defRecs) 1226 offset += 4 + numDefRecs*4 1227 else: 1228 defOVSOffset = 0 1229 1230 ndefList = [entry for entry in entryList if entry[1] is not None] 1231 if ndefList: 1232 nonDefUVSOffset = offset 1233 ndefList.sort() 1234 numNonDefRecs = len(ndefList) 1235 data.append(struct.pack(">L", numNonDefRecs)) 1236 offset += 4 + numNonDefRecs*5 1237 1238 for uv, gname in ndefList: 1239 gid = ttFont.getGlyphID(gname) 1240 ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid) 1241 data.append(ndrec) 1242 else: 1243 nonDefUVSOffset = 0 1244 1245 vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset) 1246 varSelectorRecords.append(vrec) 1247 1248 data = bytesjoin(varSelectorRecords) + bytesjoin(data) 1249 self.length = 10 + len(data) 1250 headerdata = struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) 1251 self.data = headerdata + data 1252 1253 return self.data 1254 1255 1256class cmap_format_unknown(CmapSubtable): 1257 1258 def toXML(self, writer, ttFont): 1259 cmapName = self.__class__.__name__[:12] + str(self.format) 1260 writer.begintag(cmapName, [ 1261 ("platformID", self.platformID), 1262 ("platEncID", self.platEncID), 1263 ]) 1264 writer.newline() 1265 writer.dumphex(self.data) 1266 writer.endtag(cmapName) 1267 writer.newline() 1268 1269 def fromXML(self, name, attrs, content, ttFont): 1270 self.data = readHex(content) 1271 self.cmap = {} 1272 1273 def decompileHeader(self, data, ttFont): 1274 self.language = 0 # dummy value 1275 self.data = data 1276 1277 def decompile(self, data, ttFont): 1278 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 1279 # If not, someone is calling the subtable decompile() directly, and must provide both args. 1280 if data is not None and ttFont is not None: 1281 self.decompileHeader(data[offset:offset+int(length)], ttFont) 1282 else: 1283 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 1284 1285 def compile(self, ttFont): 1286 if self.data: 1287 return self.data 1288 else: 1289 return None 1290 1291cmap_classes = { 1292 0: cmap_format_0, 1293 2: cmap_format_2, 1294 4: cmap_format_4, 1295 6: cmap_format_6, 1296 12: cmap_format_12, 1297 13: cmap_format_13, 1298 14: cmap_format_14, 1299 } 1300