_c_m_a_p.py revision 3ec6a258238b6068e4eef3fe579f1f5c0a06bbba
1import sys 2from . import DefaultTable 3import struct 4import array 5import operator 6from fontTools import ttLib 7from fontTools.misc.textTools import safeEval, readHex 8from types import TupleType 9 10 11class table__c_m_a_p(DefaultTable.DefaultTable): 12 13 def getcmap(self, platformID, platEncID): 14 for subtable in self.tables: 15 if (subtable.platformID == platformID and 16 subtable.platEncID == platEncID): 17 return subtable 18 return None # not found 19 20 def decompile(self, data, ttFont): 21 tableVersion, numSubTables = struct.unpack(">HH", data[:4]) 22 self.tableVersion = int(tableVersion) 23 self.tables = tables = [] 24 seenOffsets = {} 25 for i in range(numSubTables): 26 platformID, platEncID, offset = struct.unpack( 27 ">HHl", data[4+i*8:4+(i+1)*8]) 28 platformID, platEncID = int(platformID), int(platEncID) 29 format, length = struct.unpack(">HH", data[offset:offset+4]) 30 if format in [8,10,12,13]: 31 format, reserved, length = struct.unpack(">HHL", data[offset:offset+8]) 32 elif format in [14]: 33 format, length = struct.unpack(">HL", data[offset:offset+6]) 34 35 if not length: 36 print("Error: cmap subtable is reported as having zero length: platformID %s, platEncID %s, format %s offset %s. Skipping table." % (platformID, platEncID,format, offset)) 37 continue 38 if format not in cmap_classes: 39 table = cmap_format_unknown(format) 40 else: 41 table = cmap_classes[format](format) 42 table.platformID = platformID 43 table.platEncID = platEncID 44 # Note that by default we decompile only the subtable header info; 45 # any other data gets decompiled only when an attribute of the 46 # subtable is referenced. 47 table.decompileHeader(data[offset:offset+int(length)], ttFont) 48 if offset in seenOffsets: 49 table.cmap = tables[seenOffsets[offset]].cmap 50 else: 51 seenOffsets[offset] = i 52 tables.append(table) 53 54 def compile(self, ttFont): 55 self.tables.sort() # sort according to the spec; see CmapSubtable.__cmp__() 56 numSubTables = len(self.tables) 57 totalOffset = 4 + 8 * numSubTables 58 data = struct.pack(">HH", self.tableVersion, numSubTables) 59 tableData = "" 60 seen = {} # Some tables are the same object reference. Don't compile them twice. 61 done = {} # Some tables are different objects, but compile to the same data chunk 62 for table in self.tables: 63 try: 64 offset = seen[id(table.cmap)] 65 except KeyError: 66 chunk = table.compile(ttFont) 67 if chunk in done: 68 offset = done[chunk] 69 else: 70 offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData) 71 tableData = tableData + chunk 72 data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset) 73 return data + tableData 74 75 def toXML(self, writer, ttFont): 76 writer.simpletag("tableVersion", version=self.tableVersion) 77 writer.newline() 78 for table in self.tables: 79 table.toXML(writer, ttFont) 80 81 def fromXML(self, name, attrs, content, ttFont): 82 if name == "tableVersion": 83 self.tableVersion = safeEval(attrs["version"]) 84 return 85 if name[:12] != "cmap_format_": 86 return 87 if not hasattr(self, "tables"): 88 self.tables = [] 89 format = safeEval(name[12:]) 90 if format not in cmap_classes: 91 table = cmap_format_unknown(format) 92 else: 93 table = cmap_classes[format](format) 94 table.platformID = safeEval(attrs["platformID"]) 95 table.platEncID = safeEval(attrs["platEncID"]) 96 table.fromXML(name, attrs, content, ttFont) 97 self.tables.append(table) 98 99 100class CmapSubtable: 101 102 def __init__(self, format): 103 self.format = format 104 self.data = None 105 self.ttFont = None 106 107 def __getattr__(self, attr): 108 # allow lazy decompilation of subtables. 109 if attr[:2] == '__': # don't handle requests for member functions like '__lt__' 110 raise AttributeError(attr) 111 if self.data == None: 112 raise AttributeError(attr) 113 self.decompile(None, None) # use saved data. 114 self.data = None # Once this table has been decompiled, make sure we don't 115 # just return the original data. Also avoids recursion when 116 # called with an attribute that the cmap subtable doesn't have. 117 return getattr(self, attr) 118 119 def decompileHeader(self, data, ttFont): 120 format, length, language = struct.unpack(">HHH", data[:6]) 121 assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length) 122 self.format = int(format) 123 self.length = int(length) 124 self.language = int(language) 125 self.data = data[6:] 126 self.ttFont = ttFont 127 128 def toXML(self, writer, ttFont): 129 writer.begintag(self.__class__.__name__, [ 130 ("platformID", self.platformID), 131 ("platEncID", self.platEncID), 132 ("language", self.language), 133 ]) 134 writer.newline() 135 codes = sorted(self.cmap.items()) 136 self._writeCodes(codes, writer) 137 writer.endtag(self.__class__.__name__) 138 writer.newline() 139 140 def _writeCodes(self, codes, writer): 141 if (self.platformID, self.platEncID) == (3, 1) or (self.platformID, self.platEncID) == (3, 10) or self.platformID == 0: 142 from fontTools.unicode import Unicode 143 isUnicode = 1 144 else: 145 isUnicode = 0 146 for code, name in codes: 147 writer.simpletag("map", code=hex(code), name=name) 148 if isUnicode: 149 writer.comment(Unicode[code]) 150 writer.newline() 151 152 def __cmp__(self, other): 153 if not isinstance(self, type(other)): return cmp(type(self), type(other)) 154 155 # implemented so that list.sort() sorts according to the cmap spec. 156 selfTuple = ( 157 getattr(self, "platformID", None), 158 getattr(self, "platEncID", None), 159 getattr(self, "language", None), 160 self.__dict__) 161 otherTuple = ( 162 getattr(other, "platformID", None), 163 getattr(other, "platEncID", None), 164 getattr(other, "language", None), 165 other.__dict__) 166 return cmp(selfTuple, otherTuple) 167 168 169class cmap_format_0(CmapSubtable): 170 171 def decompile(self, data, ttFont): 172 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 173 # If not, someone is calling the subtable decompile() directly, and must provide both args. 174 if data != None and ttFont != None: 175 self.decompileHeader(data[offset:offset+int(length)], ttFont) 176 else: 177 assert (data == None and ttFont == None), "Need both data and ttFont arguments" 178 data = self.data # decompileHeader assigns the data after the header to self.data 179 assert 262 == self.length, "Format 0 cmap subtable not 262 bytes" 180 glyphIdArray = array.array("B") 181 glyphIdArray.fromstring(self.data) 182 self.cmap = cmap = {} 183 lenArray = len(glyphIdArray) 184 charCodes = list(range(lenArray)) 185 names = map(self.ttFont.getGlyphName, glyphIdArray) 186 list(map(operator.setitem, [cmap]*lenArray, charCodes, names)) 187 188 189 def compile(self, ttFont): 190 if self.data: 191 return struct.pack(">HHH", 0, 262, self.language) + self.data 192 193 charCodeList = sorted(self.cmap.items()) 194 charCodes = [entry[0] for entry in charCodeList] 195 valueList = [entry[1] for entry in charCodeList] 196 assert charCodes == list(range(256)) 197 valueList = map(ttFont.getGlyphID, valueList) 198 199 glyphIdArray = array.array("B", valueList) 200 data = struct.pack(">HHH", 0, 262, self.language) + glyphIdArray.tostring() 201 assert len(data) == 262 202 return data 203 204 def fromXML(self, name, attrs, content, ttFont): 205 self.language = safeEval(attrs["language"]) 206 if not hasattr(self, "cmap"): 207 self.cmap = {} 208 cmap = self.cmap 209 for element in content: 210 if not isinstance(element, TupleType): 211 continue 212 name, attrs, content = element 213 if name != "map": 214 continue 215 cmap[safeEval(attrs["code"])] = attrs["name"] 216 217 218subHeaderFormat = ">HHhH" 219class SubHeader: 220 def __init__(self): 221 self.firstCode = None 222 self.entryCount = None 223 self.idDelta = None 224 self.idRangeOffset = None 225 self.glyphIndexArray = [] 226 227class cmap_format_2(CmapSubtable): 228 229 def setIDDelta(self, subHeader): 230 subHeader.idDelta = 0 231 # find the minGI which is not zero. 232 minGI = subHeader.glyphIndexArray[0] 233 for gid in subHeader.glyphIndexArray: 234 if (gid != 0) and (gid < minGI): 235 minGI = gid 236 # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1. 237 # idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K. 238 # We would like to pick an idDelta such that the first glyphArray GID is 1, 239 # so that we are more likely to be able to combine glypharray GID subranges. 240 # This means that we have a problem when minGI is > 32K 241 # Since the final gi is reconstructed from the glyphArray GID by: 242 # (short)finalGID = (gid + idDelta) % 0x10000), 243 # we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the 244 # negative number to an unsigned short. 245 246 if (minGI > 1): 247 if minGI > 0x7FFF: 248 subHeader.idDelta = -(0x10000 - minGI) -1 249 else: 250 subHeader.idDelta = minGI -1 251 idDelta = subHeader.idDelta 252 for i in range(subHeader.entryCount): 253 gid = subHeader.glyphIndexArray[i] 254 if gid > 0: 255 subHeader.glyphIndexArray[i] = gid - idDelta 256 257 258 def decompile(self, data, ttFont): 259 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 260 # If not, someone is calling the subtable decompile() directly, and must provide both args. 261 if data != None and ttFont != None: 262 self.decompileHeader(data[offset:offset+int(length)], ttFont) 263 else: 264 assert (data == None and ttFont == None), "Need both data and ttFont arguments" 265 266 data = self.data # decompileHeader assigns the data after the header to self.data 267 subHeaderKeys = [] 268 maxSubHeaderindex = 0 269 # get the key array, and determine the number of subHeaders. 270 allKeys = array.array("H") 271 allKeys.fromstring(data[:512]) 272 data = data[512:] 273 if sys.byteorder != "big": 274 allKeys.byteswap() 275 subHeaderKeys = [ key/8 for key in allKeys] 276 maxSubHeaderindex = max(subHeaderKeys) 277 278 #Load subHeaders 279 subHeaderList = [] 280 pos = 0 281 for i in range(maxSubHeaderindex + 1): 282 subHeader = SubHeader() 283 (subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \ 284 subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8]) 285 pos += 8 286 giDataPos = pos + subHeader.idRangeOffset-2 287 giList = array.array("H") 288 giList.fromstring(data[giDataPos:giDataPos + subHeader.entryCount*2]) 289 if sys.byteorder != "big": 290 giList.byteswap() 291 subHeader.glyphIndexArray = giList 292 subHeaderList.append(subHeader) 293 # How this gets processed. 294 # Charcodes may be one or two bytes. 295 # The first byte of a charcode is mapped through the subHeaderKeys, to select 296 # a subHeader. For any subheader but 0, the next byte is then mapped through the 297 # selected subheader. If subheader Index 0 is selected, then the byte itself is 298 # mapped through the subheader, and there is no second byte. 299 # Then assume that the subsequent byte is the first byte of the next charcode,and repeat. 300 # 301 # Each subheader references a range in the glyphIndexArray whose length is entryCount. 302 # The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray 303 # referenced by another subheader. 304 # The only subheader that will be referenced by more than one first-byte value is the subheader 305 # that maps the entire range of glyphID values to glyphIndex 0, e.g notdef: 306 # {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx} 307 # A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex. 308 # A subheader specifies a subrange within (0...256) by the 309 # firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero 310 # (e.g. glyph not in font). 311 # If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar). 312 # The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by 313 # counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the 314 # glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex. 315 # Example for Logocut-Medium 316 # first byte of charcode = 129; selects subheader 1. 317 # subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252} 318 # second byte of charCode = 66 319 # the index offset = 66-64 = 2. 320 # The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is: 321 # [glyphIndexArray index], [subrange array index] = glyphIndex 322 # [256], [0]=1 from charcode [129, 64] 323 # [257], [1]=2 from charcode [129, 65] 324 # [258], [2]=3 from charcode [129, 66] 325 # [259], [3]=4 from charcode [129, 67] 326 # So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero, 327 # add it to the glyphID to get the final glyphIndex 328 # value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew! 329 330 self.data = "" 331 self.cmap = cmap = {} 332 notdefGI = 0 333 for firstByte in range(256): 334 subHeadindex = subHeaderKeys[firstByte] 335 subHeader = subHeaderList[subHeadindex] 336 if subHeadindex == 0: 337 if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount): 338 continue # gi is notdef. 339 else: 340 charCode = firstByte 341 offsetIndex = firstByte - subHeader.firstCode 342 gi = subHeader.glyphIndexArray[offsetIndex] 343 if gi != 0: 344 gi = (gi + subHeader.idDelta) % 0x10000 345 else: 346 continue # gi is notdef. 347 cmap[charCode] = gi 348 else: 349 if subHeader.entryCount: 350 charCodeOffset = firstByte * 256 + subHeader.firstCode 351 for offsetIndex in range(subHeader.entryCount): 352 charCode = charCodeOffset + offsetIndex 353 gi = subHeader.glyphIndexArray[offsetIndex] 354 if gi != 0: 355 gi = (gi + subHeader.idDelta) % 0x10000 356 else: 357 continue 358 cmap[charCode] = gi 359 # If not subHeader.entryCount, then all char codes with this first byte are 360 # mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the 361 # same as mapping it to .notdef. 362 # cmap values are GID's. 363 glyphOrder = self.ttFont.getGlyphOrder() 364 gids = cmap.values() 365 charCodes = cmap.keys() 366 lenCmap = len(gids) 367 try: 368 names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids )) 369 except IndexError: 370 getGlyphName = self.ttFont.getGlyphName 371 names = list(map(getGlyphName, gids )) 372 list(map(operator.setitem, [cmap]*lenCmap, charCodes, names)) 373 374 375 def compile(self, ttFont): 376 if self.data: 377 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 378 kEmptyTwoCharCodeRange = -1 379 notdefGI = 0 380 381 items = sorted(self.cmap.items()) 382 charCodes = [item[0] for item in items] 383 names = [item[1] for item in items] 384 nameMap = ttFont.getReverseGlyphMap() 385 lenCharCodes = len(charCodes) 386 try: 387 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) 388 except KeyError: 389 nameMap = ttFont.getReverseGlyphMap(rebuild=1) 390 try: 391 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) 392 except KeyError: 393 # allow virtual GIDs in format 2 tables 394 gids = [] 395 for name in names: 396 try: 397 gid = nameMap[name] 398 except KeyError: 399 try: 400 if (name[:3] == 'gid'): 401 gid = eval(name[3:]) 402 else: 403 gid = ttFont.getGlyphID(name) 404 except: 405 raise KeyError(name) 406 407 gids.append(gid) 408 409 # Process the (char code to gid) item list in char code order. 410 # By definition, all one byte char codes map to subheader 0. 411 # For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0, 412 # which defines all char codes in its range to map to notdef) unless proven otherwise. 413 # Note that since the char code items are processed in char code order, all the char codes with the 414 # same first byte are in sequential order. 415 416 subHeaderKeys = [ kEmptyTwoCharCodeRange for x in range(256)] # list of indices into subHeaderList. 417 subHeaderList = [] 418 419 # We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up 420 # with a cmap where all the one byte char codes map to notdef, 421 # with the result that the subhead 0 would not get created just by processing the item list. 422 charCode = charCodes[0] 423 if charCode > 255: 424 subHeader = SubHeader() 425 subHeader.firstCode = 0 426 subHeader.entryCount = 0 427 subHeader.idDelta = 0 428 subHeader.idRangeOffset = 0 429 subHeaderList.append(subHeader) 430 431 432 lastFirstByte = -1 433 items = zip(charCodes, gids) 434 for charCode, gid in items: 435 if gid == 0: 436 continue 437 firstbyte = charCode >> 8 438 secondByte = charCode & 0x00FF 439 440 if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one. 441 if lastFirstByte > -1: 442 # fix GI's and iDelta of current subheader. 443 self.setIDDelta(subHeader) 444 445 # If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero 446 # for the indices matching the char codes. 447 if lastFirstByte == 0: 448 for index in range(subHeader.entryCount): 449 charCode = subHeader.firstCode + index 450 subHeaderKeys[charCode] = 0 451 452 assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange." 453 # init new subheader 454 subHeader = SubHeader() 455 subHeader.firstCode = secondByte 456 subHeader.entryCount = 1 457 subHeader.glyphIndexArray.append(gid) 458 subHeaderList.append(subHeader) 459 subHeaderKeys[firstbyte] = len(subHeaderList) -1 460 lastFirstByte = firstbyte 461 else: 462 # need to fill in with notdefs all the code points between the last charCode and the current charCode. 463 codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount) 464 for i in range(codeDiff): 465 subHeader.glyphIndexArray.append(notdefGI) 466 subHeader.glyphIndexArray.append(gid) 467 subHeader.entryCount = subHeader.entryCount + codeDiff + 1 468 469 # fix GI's and iDelta of last subheader that we we added to the subheader array. 470 self.setIDDelta(subHeader) 471 472 # Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges. 473 subHeader = SubHeader() 474 subHeader.firstCode = 0 475 subHeader.entryCount = 0 476 subHeader.idDelta = 0 477 subHeader.idRangeOffset = 2 478 subHeaderList.append(subHeader) 479 emptySubheadIndex = len(subHeaderList) - 1 480 for index in range(256): 481 if subHeaderKeys[index] == kEmptyTwoCharCodeRange: 482 subHeaderKeys[index] = emptySubheadIndex 483 # Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the 484 # idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray, 485 # since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with 486 # charcode 0 and GID 0. 487 488 idRangeOffset = (len(subHeaderList)-1)*8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset. 489 subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2. 490 for index in range(subheadRangeLen): 491 subHeader = subHeaderList[index] 492 subHeader.idRangeOffset = 0 493 for j in range(index): 494 prevSubhead = subHeaderList[j] 495 if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray 496 subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8 497 subHeader.glyphIndexArray = [] 498 break 499 if subHeader.idRangeOffset == 0: # didn't find one. 500 subHeader.idRangeOffset = idRangeOffset 501 idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray. 502 else: 503 idRangeOffset = idRangeOffset - 8 # one less subheader 504 505 # Now we can write out the data! 506 length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array. 507 for subhead in subHeaderList[:-1]: 508 length = length + len(subhead.glyphIndexArray)*2 # We can't use subhead.entryCount, as some of the subhead may share subArrays. 509 dataList = [struct.pack(">HHH", 2, length, self.language)] 510 for index in subHeaderKeys: 511 dataList.append(struct.pack(">H", index*8)) 512 for subhead in subHeaderList: 513 dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset)) 514 for subhead in subHeaderList[:-1]: 515 for gi in subhead.glyphIndexArray: 516 dataList.append(struct.pack(">H", gi)) 517 data = "".join(dataList) 518 assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length) 519 return data 520 521 522 def fromXML(self, name, attrs, content, ttFont): 523 self.language = safeEval(attrs["language"]) 524 if not hasattr(self, "cmap"): 525 self.cmap = {} 526 cmap = self.cmap 527 528 for element in content: 529 if not isinstance(element, TupleType): 530 continue 531 name, attrs, content = element 532 if name != "map": 533 continue 534 cmap[safeEval(attrs["code"])] = attrs["name"] 535 536 537cmap_format_4_format = ">7H" 538 539#uint16 endCode[segCount] # Ending character code for each segment, last = 0xFFFF. 540#uint16 reservedPad # This value should be zero 541#uint16 startCode[segCount] # Starting character code for each segment 542#uint16 idDelta[segCount] # Delta for all character codes in segment 543#uint16 idRangeOffset[segCount] # Offset in bytes to glyph indexArray, or 0 544#uint16 glyphIndexArray[variable] # Glyph index array 545 546def splitRange(startCode, endCode, cmap): 547 # Try to split a range of character codes into subranges with consecutive 548 # glyph IDs in such a way that the cmap4 subtable can be stored "most" 549 # efficiently. I can't prove I've got the optimal solution, but it seems 550 # to do well with the fonts I tested: none became bigger, many became smaller. 551 if startCode == endCode: 552 return [], [endCode] 553 554 lastID = cmap[startCode] 555 lastCode = startCode 556 inOrder = None 557 orderedBegin = None 558 subRanges = [] 559 560 # Gather subranges in which the glyph IDs are consecutive. 561 for code in range(startCode + 1, endCode + 1): 562 glyphID = cmap[code] 563 564 if glyphID - 1 == lastID: 565 if inOrder is None or not inOrder: 566 inOrder = 1 567 orderedBegin = lastCode 568 else: 569 if inOrder: 570 inOrder = 0 571 subRanges.append((orderedBegin, lastCode)) 572 orderedBegin = None 573 574 lastID = glyphID 575 lastCode = code 576 577 if inOrder: 578 subRanges.append((orderedBegin, lastCode)) 579 assert lastCode == endCode 580 581 # Now filter out those new subranges that would only make the data bigger. 582 # A new segment cost 8 bytes, not using a new segment costs 2 bytes per 583 # character. 584 newRanges = [] 585 for b, e in subRanges: 586 if b == startCode and e == endCode: 587 break # the whole range, we're fine 588 if b == startCode or e == endCode: 589 threshold = 4 # split costs one more segment 590 else: 591 threshold = 8 # split costs two more segments 592 if (e - b + 1) > threshold: 593 newRanges.append((b, e)) 594 subRanges = newRanges 595 596 if not subRanges: 597 return [], [endCode] 598 599 if subRanges[0][0] != startCode: 600 subRanges.insert(0, (startCode, subRanges[0][0] - 1)) 601 if subRanges[-1][1] != endCode: 602 subRanges.append((subRanges[-1][1] + 1, endCode)) 603 604 # Fill the "holes" in the segments list -- those are the segments in which 605 # the glyph IDs are _not_ consecutive. 606 i = 1 607 while i < len(subRanges): 608 if subRanges[i-1][1] + 1 != subRanges[i][0]: 609 subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1)) 610 i = i + 1 611 i = i + 1 612 613 # Transform the ranges into startCode/endCode lists. 614 start = [] 615 end = [] 616 for b, e in subRanges: 617 start.append(b) 618 end.append(e) 619 start.pop(0) 620 621 assert len(start) + 1 == len(end) 622 return start, end 623 624 625class cmap_format_4(CmapSubtable): 626 627 def decompile(self, data, ttFont): 628 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 629 # If not, someone is calling the subtable decompile() directly, and must provide both args. 630 if data != None and ttFont != None: 631 self.decompileHeader(self.data[offset:offset+int(length)], ttFont) 632 else: 633 assert (data == None and ttFont == None), "Need both data and ttFont arguments" 634 635 data = self.data # decompileHeader assigns the data after the header to self.data 636 (segCountX2, searchRange, entrySelector, rangeShift) = \ 637 struct.unpack(">4H", data[:8]) 638 data = data[8:] 639 segCount = segCountX2 / 2 640 641 allCodes = array.array("H") 642 allCodes.fromstring(data) 643 self.data = data = None 644 645 if sys.byteorder != "big": 646 allCodes.byteswap() 647 648 # divide the data 649 endCode = allCodes[:segCount] 650 allCodes = allCodes[segCount+1:] # the +1 is skipping the reservedPad field 651 startCode = allCodes[:segCount] 652 allCodes = allCodes[segCount:] 653 idDelta = allCodes[:segCount] 654 allCodes = allCodes[segCount:] 655 idRangeOffset = allCodes[:segCount] 656 glyphIndexArray = allCodes[segCount:] 657 lenGIArray = len(glyphIndexArray) 658 659 # build 2-byte character mapping 660 charCodes = [] 661 gids = [] 662 for i in range(len(startCode) - 1): # don't do 0xffff! 663 rangeCharCodes = list(range(startCode[i], endCode[i] + 1)) 664 charCodes = charCodes + rangeCharCodes 665 for charCode in rangeCharCodes: 666 rangeOffset = idRangeOffset[i] 667 if rangeOffset == 0: 668 glyphID = charCode + idDelta[i] 669 else: 670 # *someone* needs to get killed. 671 index = idRangeOffset[i] / 2 + (charCode - startCode[i]) + i - len(idRangeOffset) 672 assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array is not less than the length of the array (%d) !" % (i, index, lenGIArray) 673 if glyphIndexArray[index] != 0: # if not missing glyph 674 glyphID = glyphIndexArray[index] + idDelta[i] 675 else: 676 glyphID = 0 # missing glyph 677 gids.append(glyphID % 0x10000) 678 679 self.cmap = cmap = {} 680 lenCmap = len(gids) 681 glyphOrder = self.ttFont.getGlyphOrder() 682 try: 683 names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids )) 684 except IndexError: 685 getGlyphName = self.ttFont.getGlyphName 686 names = list(map(getGlyphName, gids )) 687 list(map(operator.setitem, [cmap]*lenCmap, charCodes, names)) 688 689 690 691 def setIDDelta(self, idDelta): 692 # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1. 693 # idDelta is a short, and must be between -32K and 32K 694 # startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1 695 # This means that we have a problem because we can need to assign to idDelta values 696 # between -(64K-2) and 64K -1. 697 # Since the final gi is reconstructed from the glyphArray GID by: 698 # (short)finalGID = (gid + idDelta) % 0x10000), 699 # we can get from a startCode of 0 to a final GID of 64 -1K by subtracting 1, and casting the 700 # negative number to an unsigned short. 701 # Similarly , we can get from a startCode of 64K-1 to a final GID of 1 by adding 2, because of 702 # the modulo arithmetic. 703 704 if idDelta > 0x7FFF: 705 idDelta = idDelta - 0x10000 706 elif idDelta < -0x7FFF: 707 idDelta = idDelta + 0x10000 708 709 return idDelta 710 711 712 def compile(self, ttFont): 713 if self.data: 714 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 715 716 from fontTools.ttLib.sfnt import maxPowerOfTwo 717 718 charCodes = self.cmap.keys() 719 lenCharCodes = len(charCodes) 720 if lenCharCodes == 0: 721 startCode = [0xffff] 722 endCode = [0xffff] 723 else: 724 charCodes.sort() 725 names = list(map(operator.getitem, [self.cmap]*lenCharCodes, charCodes)) 726 nameMap = ttFont.getReverseGlyphMap() 727 try: 728 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) 729 except KeyError: 730 nameMap = ttFont.getReverseGlyphMap(rebuild=1) 731 try: 732 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) 733 except KeyError: 734 # allow virtual GIDs in format 4 tables 735 gids = [] 736 for name in names: 737 try: 738 gid = nameMap[name] 739 except KeyError: 740 try: 741 if (name[:3] == 'gid'): 742 gid = eval(name[3:]) 743 else: 744 gid = ttFont.getGlyphID(name) 745 except: 746 raise KeyError(name) 747 748 gids.append(gid) 749 cmap = {} # code:glyphID mapping 750 list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids)) 751 752 # Build startCode and endCode lists. 753 # Split the char codes in ranges of consecutive char codes, then split 754 # each range in more ranges of consecutive/not consecutive glyph IDs. 755 # See splitRange(). 756 lastCode = charCodes[0] 757 endCode = [] 758 startCode = [lastCode] 759 for charCode in charCodes[1:]: # skip the first code, it's the first start code 760 if charCode == lastCode + 1: 761 lastCode = charCode 762 continue 763 start, end = splitRange(startCode[-1], lastCode, cmap) 764 startCode.extend(start) 765 endCode.extend(end) 766 startCode.append(charCode) 767 lastCode = charCode 768 endCode.append(lastCode) 769 startCode.append(0xffff) 770 endCode.append(0xffff) 771 772 # build up rest of cruft 773 idDelta = [] 774 idRangeOffset = [] 775 glyphIndexArray = [] 776 for i in range(len(endCode)-1): # skip the closing codes (0xffff) 777 indices = [] 778 for charCode in range(startCode[i], endCode[i] + 1): 779 indices.append(cmap[charCode]) 780 if (indices == list(range(indices[0], indices[0] + len(indices)))): 781 idDeltaTemp = self.setIDDelta(indices[0] - startCode[i]) 782 idDelta.append( idDeltaTemp) 783 idRangeOffset.append(0) 784 else: 785 # someone *definitely* needs to get killed. 786 idDelta.append(0) 787 idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i)) 788 glyphIndexArray.extend(indices) 789 idDelta.append(1) # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef 790 idRangeOffset.append(0) 791 792 # Insane. 793 segCount = len(endCode) 794 segCountX2 = segCount * 2 795 maxExponent = maxPowerOfTwo(segCount) 796 searchRange = 2 * (2 ** maxExponent) 797 entrySelector = maxExponent 798 rangeShift = 2 * segCount - searchRange 799 800 charCodeArray = array.array("H", endCode + [0] + startCode) 801 idDeltaeArray = array.array("h", idDelta) 802 restArray = array.array("H", idRangeOffset + glyphIndexArray) 803 if sys.byteorder != "big": 804 charCodeArray.byteswap() 805 idDeltaeArray.byteswap() 806 restArray.byteswap() 807 data = charCodeArray.tostring() + idDeltaeArray.tostring() + restArray.tostring() 808 809 length = struct.calcsize(cmap_format_4_format) + len(data) 810 header = struct.pack(cmap_format_4_format, self.format, length, self.language, 811 segCountX2, searchRange, entrySelector, rangeShift) 812 return header + data 813 814 def fromXML(self, name, attrs, content, ttFont): 815 self.language = safeEval(attrs["language"]) 816 if not hasattr(self, "cmap"): 817 self.cmap = {} 818 cmap = self.cmap 819 820 for element in content: 821 if not isinstance(element, TupleType): 822 continue 823 nameMap, attrsMap, dummyContent = element 824 if nameMap != "map": 825 assert 0, "Unrecognized keyword in cmap subtable" 826 cmap[safeEval(attrsMap["code"])] = attrsMap["name"] 827 828 829class cmap_format_6(CmapSubtable): 830 831 def decompile(self, data, ttFont): 832 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 833 # If not, someone is calling the subtable decompile() directly, and must provide both args. 834 if data != None and ttFont != None: 835 self.decompileHeader(data[offset:offset+int(length)], ttFont) 836 else: 837 assert (data == None and ttFont == None), "Need both data and ttFont arguments" 838 839 data = self.data # decompileHeader assigns the data after the header to self.data 840 firstCode, entryCount = struct.unpack(">HH", data[:4]) 841 firstCode = int(firstCode) 842 data = data[4:] 843 #assert len(data) == 2 * entryCount # XXX not true in Apple's Helvetica!!! 844 glyphIndexArray = array.array("H") 845 glyphIndexArray.fromstring(data[:2 * int(entryCount)]) 846 if sys.byteorder != "big": 847 glyphIndexArray.byteswap() 848 self.data = data = None 849 850 self.cmap = cmap = {} 851 852 lenArray = len(glyphIndexArray) 853 charCodes = list(range(firstCode, firstCode + lenArray)) 854 glyphOrder = self.ttFont.getGlyphOrder() 855 try: 856 names = list(map(operator.getitem, [glyphOrder]*lenArray, glyphIndexArray )) 857 except IndexError: 858 getGlyphName = self.ttFont.getGlyphName 859 names = list(map(getGlyphName, glyphIndexArray )) 860 list(map(operator.setitem, [cmap]*lenArray, charCodes, names)) 861 862 def compile(self, ttFont): 863 if self.data: 864 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 865 cmap = self.cmap 866 codes = cmap.keys() 867 if codes: # yes, there are empty cmap tables. 868 codes = list(range(codes[0], codes[-1] + 1)) 869 firstCode = codes[0] 870 valueList = [cmap.get(code, ".notdef") for code in codes] 871 valueList = map(ttFont.getGlyphID, valueList) 872 glyphIndexArray = array.array("H", valueList) 873 if sys.byteorder != "big": 874 glyphIndexArray.byteswap() 875 data = glyphIndexArray.tostring() 876 else: 877 data = "" 878 firstCode = 0 879 header = struct.pack(">HHHHH", 880 6, len(data) + 10, self.language, firstCode, len(codes)) 881 return header + data 882 883 def fromXML(self, name, attrs, content, ttFont): 884 self.language = safeEval(attrs["language"]) 885 if not hasattr(self, "cmap"): 886 self.cmap = {} 887 cmap = self.cmap 888 889 for element in content: 890 if not isinstance(element, TupleType): 891 continue 892 name, attrs, content = element 893 if name != "map": 894 continue 895 cmap[safeEval(attrs["code"])] = attrs["name"] 896 897 898class cmap_format_12_or_13(CmapSubtable): 899 900 def __init__(self, format): 901 self.format = format 902 self.reserved = 0 903 self.data = None 904 self.ttFont = None 905 906 def decompileHeader(self, data, ttFont): 907 format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16]) 908 assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length) 909 self.format = format 910 self.reserved = reserved 911 self.length = length 912 self.language = language 913 self.nGroups = nGroups 914 self.data = data[16:] 915 self.ttFont = ttFont 916 917 def decompile(self, data, ttFont): 918 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 919 # If not, someone is calling the subtable decompile() directly, and must provide both args. 920 if data != None and ttFont != None: 921 self.decompileHeader(data[offset:offset+int(length)], ttFont) 922 else: 923 assert (data == None and ttFont == None), "Need both data and ttFont arguments" 924 925 data = self.data # decompileHeader assigns the data after the header to self.data 926 charCodes = [] 927 gids = [] 928 pos = 0 929 for i in range(self.nGroups): 930 startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] ) 931 pos += 12 932 lenGroup = 1 + endCharCode - startCharCode 933 charCodes += list(range(startCharCode, endCharCode +1)) 934 gids += self._computeGIDs(glyphID, lenGroup) 935 self.data = data = None 936 self.cmap = cmap = {} 937 lenCmap = len(gids) 938 glyphOrder = self.ttFont.getGlyphOrder() 939 try: 940 names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids )) 941 except IndexError: 942 getGlyphName = self.ttFont.getGlyphName 943 names = list(map(getGlyphName, gids )) 944 list(map(operator.setitem, [cmap]*lenCmap, charCodes, names)) 945 946 def compile(self, ttFont): 947 if self.data: 948 return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data 949 charCodes = self.cmap.keys() 950 lenCharCodes = len(charCodes) 951 names = self.cmap.values() 952 nameMap = ttFont.getReverseGlyphMap() 953 try: 954 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) 955 except KeyError: 956 nameMap = ttFont.getReverseGlyphMap(rebuild=1) 957 try: 958 gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names)) 959 except KeyError: 960 # allow virtual GIDs in format 12 tables 961 gids = [] 962 for name in names: 963 try: 964 gid = nameMap[name] 965 except KeyError: 966 try: 967 if (name[:3] == 'gid'): 968 gid = eval(name[3:]) 969 else: 970 gid = ttFont.getGlyphID(name) 971 except: 972 raise KeyError(name) 973 974 gids.append(gid) 975 976 cmap = {} # code:glyphID mapping 977 list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids)) 978 979 charCodes.sort() 980 index = 0 981 startCharCode = charCodes[0] 982 startGlyphID = cmap[startCharCode] 983 lastGlyphID = startGlyphID - self._format_step 984 lastCharCode = startCharCode - 1 985 nGroups = 0 986 dataList = [] 987 maxIndex = len(charCodes) 988 for index in range(maxIndex): 989 charCode = charCodes[index] 990 glyphID = cmap[charCode] 991 if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode): 992 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) 993 startCharCode = charCode 994 startGlyphID = glyphID 995 nGroups = nGroups + 1 996 lastGlyphID = glyphID 997 lastCharCode = charCode 998 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) 999 nGroups = nGroups + 1 1000 data = "".join(dataList) 1001 lengthSubtable = len(data) +16 1002 assert len(data) == (nGroups*12) == (lengthSubtable-16) 1003 return struct.pack(">HHLLL", self.format, self.reserved , lengthSubtable, self.language, nGroups) + data 1004 1005 def toXML(self, writer, ttFont): 1006 writer.begintag(self.__class__.__name__, [ 1007 ("platformID", self.platformID), 1008 ("platEncID", self.platEncID), 1009 ("format", self.format), 1010 ("reserved", self.reserved), 1011 ("length", self.length), 1012 ("language", self.language), 1013 ("nGroups", self.nGroups), 1014 ]) 1015 writer.newline() 1016 codes = sorted(self.cmap.items()) 1017 self._writeCodes(codes, writer) 1018 writer.endtag(self.__class__.__name__) 1019 writer.newline() 1020 1021 def fromXML(self, name, attrs, content, ttFont): 1022 self.format = safeEval(attrs["format"]) 1023 self.reserved = safeEval(attrs["reserved"]) 1024 self.length = safeEval(attrs["length"]) 1025 self.language = safeEval(attrs["language"]) 1026 self.nGroups = safeEval(attrs["nGroups"]) 1027 if not hasattr(self, "cmap"): 1028 self.cmap = {} 1029 cmap = self.cmap 1030 1031 for element in content: 1032 if not isinstance(element, TupleType): 1033 continue 1034 name, attrs, content = element 1035 if name != "map": 1036 continue 1037 cmap[safeEval(attrs["code"])] = attrs["name"] 1038 1039 1040class cmap_format_12(cmap_format_12_or_13): 1041 def __init__(self, format): 1042 cmap_format_12_or_13.__init__(self, format) 1043 self._format_step = 1 1044 1045 def _computeGIDs(self, startingGlyph, numberOfGlyphs): 1046 return list(range(startingGlyph, startingGlyph + numberOfGlyphs)) 1047 1048 def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): 1049 return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode) 1050 1051 1052class cmap_format_13(cmap_format_12_or_13): 1053 def __init__(self, format): 1054 cmap_format_12_or_13.__init__(self, format) 1055 self._format_step = 0 1056 1057 def _computeGIDs(self, startingGlyph, numberOfGlyphs): 1058 return [startingGlyph] * numberOfGlyphs 1059 1060 def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): 1061 return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode) 1062 1063 1064def cvtToUVS(threeByteString): 1065 if sys.byteorder != "big": 1066 data = "\0" +threeByteString 1067 else: 1068 data = threeByteString + "\0" 1069 val, = struct.unpack(">L", data) 1070 return val 1071 1072def cvtFromUVS(val): 1073 if sys.byteorder != "big": 1074 threeByteString = struct.pack(">L", val)[1:] 1075 else: 1076 threeByteString = struct.pack(">L", val)[:3] 1077 return threeByteString 1078 1079def cmpUVSListEntry(first, second): 1080 uv1, glyphName1 = first 1081 uv2, glyphName2 = second 1082 1083 if (glyphName1 == None) and (glyphName2 != None): 1084 return -1 1085 elif (glyphName2 == None) and (glyphName1 != None): 1086 return 1 1087 1088 ret = cmp(uv1, uv2) 1089 if ret: 1090 return ret 1091 return cmp(glyphName1, glyphName2) 1092 1093 1094class cmap_format_14(CmapSubtable): 1095 1096 def decompileHeader(self, data, ttFont): 1097 format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10]) 1098 self.data = data[10:] 1099 self.length = length 1100 self.numVarSelectorRecords = numVarSelectorRecords 1101 self.ttFont = ttFont 1102 self.language = 0xFF # has no language. 1103 1104 def decompile(self, data, ttFont): 1105 if data != None and ttFont != None and ttFont.lazy: 1106 self.decompileHeader(data, ttFont) 1107 else: 1108 assert (data == None and ttFont == None), "Need both data and ttFont arguments" 1109 data = self.data 1110 1111 self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. 1112 uvsDict = {} 1113 recOffset = 0 1114 for n in range(self.numVarSelectorRecords): 1115 uvs, defOVSOffset, nonDefUVSOffset = struct.unpack(">3sLL", data[recOffset:recOffset +11]) 1116 recOffset += 11 1117 varUVS = cvtToUVS(uvs) 1118 if defOVSOffset: 1119 startOffset = defOVSOffset - 10 1120 numValues, = struct.unpack(">L", data[startOffset:startOffset+4]) 1121 startOffset +=4 1122 for r in range(numValues): 1123 uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4]) 1124 startOffset += 4 1125 firstBaseUV = cvtToUVS(uv) 1126 cnt = addtlCnt+1 1127 baseUVList = list(range(firstBaseUV, firstBaseUV+cnt)) 1128 glyphList = [None]*cnt 1129 localUVList = zip(baseUVList, glyphList) 1130 try: 1131 uvsDict[varUVS].extend(localUVList) 1132 except KeyError: 1133 uvsDict[varUVS] = list(localUVList) 1134 1135 if nonDefUVSOffset: 1136 startOffset = nonDefUVSOffset - 10 1137 numRecs, = struct.unpack(">L", data[startOffset:startOffset+4]) 1138 startOffset +=4 1139 localUVList = [] 1140 for r in range(numRecs): 1141 uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5]) 1142 startOffset += 5 1143 uv = cvtToUVS(uv) 1144 glyphName = self.ttFont.getGlyphName(gid) 1145 localUVList.append( [uv, glyphName] ) 1146 try: 1147 uvsDict[varUVS].extend(localUVList) 1148 except KeyError: 1149 uvsDict[varUVS] = localUVList 1150 1151 self.uvsDict = uvsDict 1152 1153 def toXML(self, writer, ttFont): 1154 writer.begintag(self.__class__.__name__, [ 1155 ("platformID", self.platformID), 1156 ("platEncID", self.platEncID), 1157 ("format", self.format), 1158 ("length", self.length), 1159 ("numVarSelectorRecords", self.numVarSelectorRecords), 1160 ]) 1161 writer.newline() 1162 uvsDict = self.uvsDict 1163 uvsList = sorted(uvsDict.keys()) 1164 for uvs in uvsList: 1165 uvList = uvsDict[uvs] 1166 uvList.sort(cmpUVSListEntry) 1167 for uv, gname in uvList: 1168 if gname == None: 1169 gname = "None" 1170 # I use the arg rather than th keyword syntax in order to preserve the attribute order. 1171 writer.simpletag("map", [ ("uvs",hex(uvs)), ("uv",hex(uv)), ("name", gname)] ) 1172 writer.newline() 1173 writer.endtag(self.__class__.__name__) 1174 writer.newline() 1175 1176 def fromXML(self, name, attrs, content, ttFont): 1177 self.format = safeEval(attrs["format"]) 1178 self.length = safeEval(attrs["length"]) 1179 self.numVarSelectorRecords = safeEval(attrs["numVarSelectorRecords"]) 1180 self.language = 0xFF # provide a value so that CmapSubtable.__cmp__() won't fail 1181 if not hasattr(self, "cmap"): 1182 self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. 1183 if not hasattr(self, "uvsDict"): 1184 self.uvsDict = {} 1185 uvsDict = self.uvsDict 1186 1187 for element in content: 1188 if not isinstance(element, TupleType): 1189 continue 1190 name, attrs, content = element 1191 if name != "map": 1192 continue 1193 uvs = safeEval(attrs["uvs"]) 1194 uv = safeEval(attrs["uv"]) 1195 gname = attrs["name"] 1196 if gname == "None": 1197 gname = None 1198 try: 1199 uvsDict[uvs].append( [uv, gname]) 1200 except KeyError: 1201 uvsDict[uvs] = [ [uv, gname] ] 1202 1203 1204 def compile(self, ttFont): 1205 if self.data: 1206 return struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) + self.data 1207 1208 uvsDict = self.uvsDict 1209 uvsList = sorted(uvsDict.keys()) 1210 self.numVarSelectorRecords = len(uvsList) 1211 offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block. 1212 data = [] 1213 varSelectorRecords =[] 1214 for uvs in uvsList: 1215 entryList = uvsDict[uvs] 1216 1217 defList = [entry for entry in entryList if entry[1] == None] 1218 if defList: 1219 defList = [entry[0] for entry in defList] 1220 defOVSOffset = offset 1221 defList.sort() 1222 1223 lastUV = defList[0] 1224 cnt = -1 1225 defRecs = [] 1226 for defEntry in defList: 1227 cnt +=1 1228 if (lastUV+cnt) != defEntry: 1229 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1) 1230 lastUV = defEntry 1231 defRecs.append(rec) 1232 cnt = 0 1233 1234 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt) 1235 defRecs.append(rec) 1236 1237 numDefRecs = len(defRecs) 1238 data.append(struct.pack(">L", numDefRecs)) 1239 data.extend(defRecs) 1240 offset += 4 + numDefRecs*4 1241 else: 1242 defOVSOffset = 0 1243 1244 ndefList = [entry for entry in entryList if entry[1] != None] 1245 if ndefList: 1246 nonDefUVSOffset = offset 1247 ndefList.sort() 1248 numNonDefRecs = len(ndefList) 1249 data.append(struct.pack(">L", numNonDefRecs)) 1250 offset += 4 + numNonDefRecs*5 1251 1252 for uv, gname in ndefList: 1253 gid = ttFont.getGlyphID(gname) 1254 ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid) 1255 data.append(ndrec) 1256 else: 1257 nonDefUVSOffset = 0 1258 1259 vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset) 1260 varSelectorRecords.append(vrec) 1261 1262 data = "".join(varSelectorRecords) + "".join(data) 1263 self.length = 10 + len(data) 1264 headerdata = struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) 1265 self.data = headerdata + data 1266 1267 return self.data 1268 1269 1270class cmap_format_unknown(CmapSubtable): 1271 1272 def toXML(self, writer, ttFont): 1273 cmapName = self.__class__.__name__[:12] + str(self.format) 1274 writer.begintag(cmapName, [ 1275 ("platformID", self.platformID), 1276 ("platEncID", self.platEncID), 1277 ]) 1278 writer.newline() 1279 writer.dumphex(self.data) 1280 writer.endtag(cmapName) 1281 writer.newline() 1282 1283 def fromXML(self, name, attrs, content, ttFont): 1284 self.data = readHex(content) 1285 self.cmap = {} 1286 1287 def decompileHeader(self, data, ttFont): 1288 self.language = 0 # dummy value 1289 self.data = data 1290 1291 def decompile(self, data, ttFont): 1292 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 1293 # If not, someone is calling the subtable decompile() directly, and must provide both args. 1294 if data != None and ttFont != None: 1295 self.decompileHeader(data[offset:offset+int(length)], ttFont) 1296 else: 1297 assert (data == None and ttFont == None), "Need both data and ttFont arguments" 1298 1299 def compile(self, ttFont): 1300 if self.data: 1301 return self.data 1302 else: 1303 return None 1304 1305cmap_classes = { 1306 0: cmap_format_0, 1307 2: cmap_format_2, 1308 4: cmap_format_4, 1309 6: cmap_format_6, 1310 12: cmap_format_12, 1311 13: cmap_format_13, 1312 14: cmap_format_14, 1313 } 1314