_c_m_a_p.py revision 3a9fd301808f5a8991ca9ac44028d1ecb22d307f
1import sys 2from . import DefaultTable 3import struct 4import array 5import operator 6from fontTools import ttLib 7from fontTools.misc.textTools import safeEval, readHex 8from types import TupleType 9 10 11class table__c_m_a_p(DefaultTable.DefaultTable): 12 13 def getcmap(self, platformID, platEncID): 14 for subtable in self.tables: 15 if (subtable.platformID == platformID and 16 subtable.platEncID == platEncID): 17 return subtable 18 return None # not found 19 20 def decompile(self, data, ttFont): 21 tableVersion, numSubTables = struct.unpack(">HH", data[:4]) 22 self.tableVersion = int(tableVersion) 23 self.tables = tables = [] 24 seenOffsets = {} 25 for i in range(numSubTables): 26 platformID, platEncID, offset = struct.unpack( 27 ">HHl", data[4+i*8:4+(i+1)*8]) 28 platformID, platEncID = int(platformID), int(platEncID) 29 format, length = struct.unpack(">HH", data[offset:offset+4]) 30 if format in [8,10,12,13]: 31 format, reserved, length = struct.unpack(">HHL", data[offset:offset+8]) 32 elif format in [14]: 33 format, length = struct.unpack(">HL", data[offset:offset+6]) 34 35 if not length: 36 print "Error: cmap subtable is reported as having zero length: platformID %s, platEncID %s, format %s offset %s. Skipping table." % (platformID, platEncID,format, offset) 37 continue 38 if format not in cmap_classes: 39 table = cmap_format_unknown(format) 40 else: 41 table = cmap_classes[format](format) 42 table.platformID = platformID 43 table.platEncID = platEncID 44 # Note that by default we decompile only the subtable header info; 45 # any other data gets decompiled only when an attribute of the 46 # subtable is referenced. 47 table.decompileHeader(data[offset:offset+int(length)], ttFont) 48 if offset in seenOffsets: 49 table.cmap = tables[seenOffsets[offset]].cmap 50 else: 51 seenOffsets[offset] = i 52 tables.append(table) 53 54 def compile(self, ttFont): 55 self.tables.sort() # sort according to the spec; see CmapSubtable.__cmp__() 56 numSubTables = len(self.tables) 57 totalOffset = 4 + 8 * numSubTables 58 data = struct.pack(">HH", self.tableVersion, numSubTables) 59 tableData = "" 60 seen = {} # Some tables are the same object reference. Don't compile them twice. 61 done = {} # Some tables are different objects, but compile to the same data chunk 62 for table in self.tables: 63 try: 64 offset = seen[id(table.cmap)] 65 except KeyError: 66 chunk = table.compile(ttFont) 67 if chunk in done: 68 offset = done[chunk] 69 else: 70 offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData) 71 tableData = tableData + chunk 72 data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset) 73 return data + tableData 74 75 def toXML(self, writer, ttFont): 76 writer.simpletag("tableVersion", version=self.tableVersion) 77 writer.newline() 78 for table in self.tables: 79 table.toXML(writer, ttFont) 80 81 def fromXML(self, name, attrs, content, ttFont): 82 if name == "tableVersion": 83 self.tableVersion = safeEval(attrs["version"]) 84 return 85 if name[:12] != "cmap_format_": 86 return 87 if not hasattr(self, "tables"): 88 self.tables = [] 89 format = safeEval(name[12:]) 90 if format not in cmap_classes: 91 table = cmap_format_unknown(format) 92 else: 93 table = cmap_classes[format](format) 94 table.platformID = safeEval(attrs["platformID"]) 95 table.platEncID = safeEval(attrs["platEncID"]) 96 table.fromXML(name, attrs, content, ttFont) 97 self.tables.append(table) 98 99 100class CmapSubtable: 101 102 def __init__(self, format): 103 self.format = format 104 self.data = None 105 self.ttFont = None 106 107 def __getattr__(self, attr): 108 # allow lazy decompilation of subtables. 109 if attr[:2] == '__': # don't handle requests for member functions like '__lt__' 110 raise AttributeError(attr) 111 if self.data == None: 112 raise AttributeError(attr) 113 self.decompile(None, None) # use saved data. 114 self.data = None # Once this table has been decompiled, make sure we don't 115 # just return the original data. Also avoids recursion when 116 # called with an attribute that the cmap subtable doesn't have. 117 return getattr(self, attr) 118 119 def decompileHeader(self, data, ttFont): 120 format, length, language = struct.unpack(">HHH", data[:6]) 121 assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length) 122 self.format = int(format) 123 self.length = int(length) 124 self.language = int(language) 125 self.data = data[6:] 126 self.ttFont = ttFont 127 128 def toXML(self, writer, ttFont): 129 writer.begintag(self.__class__.__name__, [ 130 ("platformID", self.platformID), 131 ("platEncID", self.platEncID), 132 ("language", self.language), 133 ]) 134 writer.newline() 135 codes = self.cmap.items() 136 codes.sort() 137 self._writeCodes(codes, writer) 138 writer.endtag(self.__class__.__name__) 139 writer.newline() 140 141 def _writeCodes(self, codes, writer): 142 if (self.platformID, self.platEncID) == (3, 1) or (self.platformID, self.platEncID) == (3, 10) or self.platformID == 0: 143 from fontTools.unicode import Unicode 144 isUnicode = 1 145 else: 146 isUnicode = 0 147 for code, name in codes: 148 writer.simpletag("map", code=hex(code), name=name) 149 if isUnicode: 150 writer.comment(Unicode[code]) 151 writer.newline() 152 153 def __cmp__(self, other): 154 if type(self) != type(other): return cmp(type(self), type(other)) 155 156 # implemented so that list.sort() sorts according to the cmap spec. 157 selfTuple = ( 158 getattr(self, "platformID", None), 159 getattr(self, "platEncID", None), 160 getattr(self, "language", None), 161 self.__dict__) 162 otherTuple = ( 163 getattr(other, "platformID", None), 164 getattr(other, "platEncID", None), 165 getattr(other, "language", None), 166 other.__dict__) 167 return cmp(selfTuple, otherTuple) 168 169 170class cmap_format_0(CmapSubtable): 171 172 def decompile(self, data, ttFont): 173 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 174 # If not, someone is calling the subtable decompile() directly, and must provide both args. 175 if data != None and ttFont != None: 176 self.decompileHeader(data[offset:offset+int(length)], ttFont) 177 else: 178 assert (data == None and ttFont == None), "Need both data and ttFont arguments" 179 data = self.data # decompileHeader assigns the data after the header to self.data 180 assert 262 == self.length, "Format 0 cmap subtable not 262 bytes" 181 glyphIdArray = array.array("B") 182 glyphIdArray.fromstring(self.data) 183 self.cmap = cmap = {} 184 lenArray = len(glyphIdArray) 185 charCodes = range(lenArray) 186 names = map(self.ttFont.getGlyphName, glyphIdArray) 187 map(operator.setitem, [cmap]*lenArray, charCodes, names) 188 189 190 def compile(self, ttFont): 191 if self.data: 192 return struct.pack(">HHH", 0, 262, self.language) + self.data 193 194 charCodeList = self.cmap.items() 195 charCodeList.sort() 196 charCodes = [entry[0] for entry in charCodeList] 197 valueList = [entry[1] for entry in charCodeList] 198 assert charCodes == range(256) 199 valueList = map(ttFont.getGlyphID, valueList) 200 201 glyphIdArray = array.array("B", valueList) 202 data = struct.pack(">HHH", 0, 262, self.language) + glyphIdArray.tostring() 203 assert len(data) == 262 204 return data 205 206 def fromXML(self, name, attrs, content, ttFont): 207 self.language = safeEval(attrs["language"]) 208 if not hasattr(self, "cmap"): 209 self.cmap = {} 210 cmap = self.cmap 211 for element in content: 212 if type(element) != TupleType: 213 continue 214 name, attrs, content = element 215 if name != "map": 216 continue 217 cmap[safeEval(attrs["code"])] = attrs["name"] 218 219 220subHeaderFormat = ">HHhH" 221class SubHeader: 222 def __init__(self): 223 self.firstCode = None 224 self.entryCount = None 225 self.idDelta = None 226 self.idRangeOffset = None 227 self.glyphIndexArray = [] 228 229class cmap_format_2(CmapSubtable): 230 231 def setIDDelta(self, subHeader): 232 subHeader.idDelta = 0 233 # find the minGI which is not zero. 234 minGI = subHeader.glyphIndexArray[0] 235 for gid in subHeader.glyphIndexArray: 236 if (gid != 0) and (gid < minGI): 237 minGI = gid 238 # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1. 239 # idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K. 240 # We would like to pick an idDelta such that the first glyphArray GID is 1, 241 # so that we are more likely to be able to combine glypharray GID subranges. 242 # This means that we have a problem when minGI is > 32K 243 # Since the final gi is reconstructed from the glyphArray GID by: 244 # (short)finalGID = (gid + idDelta) % 0x10000), 245 # we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the 246 # negative number to an unsigned short. 247 248 if (minGI > 1): 249 if minGI > 0x7FFF: 250 subHeader.idDelta = -(0x10000 - minGI) -1 251 else: 252 subHeader.idDelta = minGI -1 253 idDelta = subHeader.idDelta 254 for i in range(subHeader.entryCount): 255 gid = subHeader.glyphIndexArray[i] 256 if gid > 0: 257 subHeader.glyphIndexArray[i] = gid - idDelta 258 259 260 def decompile(self, data, ttFont): 261 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 262 # If not, someone is calling the subtable decompile() directly, and must provide both args. 263 if data != None and ttFont != None: 264 self.decompileHeader(data[offset:offset+int(length)], ttFont) 265 else: 266 assert (data == None and ttFont == None), "Need both data and ttFont arguments" 267 268 data = self.data # decompileHeader assigns the data after the header to self.data 269 subHeaderKeys = [] 270 maxSubHeaderindex = 0 271 # get the key array, and determine the number of subHeaders. 272 allKeys = array.array("H") 273 allKeys.fromstring(data[:512]) 274 data = data[512:] 275 if sys.byteorder != "big": 276 allKeys.byteswap() 277 subHeaderKeys = [ key/8 for key in allKeys] 278 maxSubHeaderindex = max(subHeaderKeys) 279 280 #Load subHeaders 281 subHeaderList = [] 282 pos = 0 283 for i in range(maxSubHeaderindex + 1): 284 subHeader = SubHeader() 285 (subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \ 286 subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8]) 287 pos += 8 288 giDataPos = pos + subHeader.idRangeOffset-2 289 giList = array.array("H") 290 giList.fromstring(data[giDataPos:giDataPos + subHeader.entryCount*2]) 291 if sys.byteorder != "big": 292 giList.byteswap() 293 subHeader.glyphIndexArray = giList 294 subHeaderList.append(subHeader) 295 # How this gets processed. 296 # Charcodes may be one or two bytes. 297 # The first byte of a charcode is mapped through the subHeaderKeys, to select 298 # a subHeader. For any subheader but 0, the next byte is then mapped through the 299 # selected subheader. If subheader Index 0 is selected, then the byte itself is 300 # mapped through the subheader, and there is no second byte. 301 # Then assume that the subsequent byte is the first byte of the next charcode,and repeat. 302 # 303 # Each subheader references a range in the glyphIndexArray whose length is entryCount. 304 # The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray 305 # referenced by another subheader. 306 # The only subheader that will be referenced by more than one first-byte value is the subheader 307 # that maps the entire range of glyphID values to glyphIndex 0, e.g notdef: 308 # {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx} 309 # A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex. 310 # A subheader specifies a subrange within (0...256) by the 311 # firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero 312 # (e.g. glyph not in font). 313 # If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar). 314 # The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by 315 # counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the 316 # glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex. 317 # Example for Logocut-Medium 318 # first byte of charcode = 129; selects subheader 1. 319 # subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252} 320 # second byte of charCode = 66 321 # the index offset = 66-64 = 2. 322 # The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is: 323 # [glyphIndexArray index], [subrange array index] = glyphIndex 324 # [256], [0]=1 from charcode [129, 64] 325 # [257], [1]=2 from charcode [129, 65] 326 # [258], [2]=3 from charcode [129, 66] 327 # [259], [3]=4 from charcode [129, 67] 328 # So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero, 329 # add it to the glyphID to get the final glyphIndex 330 # value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew! 331 332 self.data = "" 333 self.cmap = cmap = {} 334 notdefGI = 0 335 for firstByte in range(256): 336 subHeadindex = subHeaderKeys[firstByte] 337 subHeader = subHeaderList[subHeadindex] 338 if subHeadindex == 0: 339 if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount): 340 continue # gi is notdef. 341 else: 342 charCode = firstByte 343 offsetIndex = firstByte - subHeader.firstCode 344 gi = subHeader.glyphIndexArray[offsetIndex] 345 if gi != 0: 346 gi = (gi + subHeader.idDelta) % 0x10000 347 else: 348 continue # gi is notdef. 349 cmap[charCode] = gi 350 else: 351 if subHeader.entryCount: 352 charCodeOffset = firstByte * 256 + subHeader.firstCode 353 for offsetIndex in range(subHeader.entryCount): 354 charCode = charCodeOffset + offsetIndex 355 gi = subHeader.glyphIndexArray[offsetIndex] 356 if gi != 0: 357 gi = (gi + subHeader.idDelta) % 0x10000 358 else: 359 continue 360 cmap[charCode] = gi 361 # If not subHeader.entryCount, then all char codes with this first byte are 362 # mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the 363 # same as mapping it to .notdef. 364 # cmap values are GID's. 365 glyphOrder = self.ttFont.getGlyphOrder() 366 gids = cmap.values() 367 charCodes = cmap.keys() 368 lenCmap = len(gids) 369 try: 370 names = map(operator.getitem, [glyphOrder]*lenCmap, gids ) 371 except IndexError: 372 getGlyphName = self.ttFont.getGlyphName 373 names = map(getGlyphName, gids ) 374 map(operator.setitem, [cmap]*lenCmap, charCodes, names) 375 376 377 def compile(self, ttFont): 378 if self.data: 379 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 380 kEmptyTwoCharCodeRange = -1 381 notdefGI = 0 382 383 items = self.cmap.items() 384 items.sort() 385 charCodes = [item[0] for item in items] 386 names = [item[1] for item in items] 387 nameMap = ttFont.getReverseGlyphMap() 388 lenCharCodes = len(charCodes) 389 try: 390 gids = map(operator.getitem, [nameMap]*lenCharCodes, names) 391 except KeyError: 392 nameMap = ttFont.getReverseGlyphMap(rebuild=1) 393 try: 394 gids = map(operator.getitem, [nameMap]*lenCharCodes, names) 395 except KeyError: 396 # allow virtual GIDs in format 2 tables 397 gids = [] 398 for name in names: 399 try: 400 gid = nameMap[name] 401 except KeyError: 402 try: 403 if (name[:3] == 'gid'): 404 gid = eval(name[3:]) 405 else: 406 gid = ttFont.getGlyphID(name) 407 except: 408 raise KeyError(name) 409 410 gids.append(gid) 411 412 # Process the (char code to gid) item list in char code order. 413 # By definition, all one byte char codes map to subheader 0. 414 # For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0, 415 # which defines all char codes in its range to map to notdef) unless proven otherwise. 416 # Note that since the char code items are processed in char code order, all the char codes with the 417 # same first byte are in sequential order. 418 419 subHeaderKeys = [ kEmptyTwoCharCodeRange for x in range(256)] # list of indices into subHeaderList. 420 subHeaderList = [] 421 422 # We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up 423 # with a cmap where all the one byte char codes map to notdef, 424 # with the result that the subhead 0 would not get created just by processing the item list. 425 charCode = charCodes[0] 426 if charCode > 255: 427 subHeader = SubHeader() 428 subHeader.firstCode = 0 429 subHeader.entryCount = 0 430 subHeader.idDelta = 0 431 subHeader.idRangeOffset = 0 432 subHeaderList.append(subHeader) 433 434 435 lastFirstByte = -1 436 items = zip(charCodes, gids) 437 for charCode, gid in items: 438 if gid == 0: 439 continue 440 firstbyte = charCode >> 8 441 secondByte = charCode & 0x00FF 442 443 if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one. 444 if lastFirstByte > -1: 445 # fix GI's and iDelta of current subheader. 446 self.setIDDelta(subHeader) 447 448 # If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero 449 # for the indices matching the char codes. 450 if lastFirstByte == 0: 451 for index in range(subHeader.entryCount): 452 charCode = subHeader.firstCode + index 453 subHeaderKeys[charCode] = 0 454 455 assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange." 456 # init new subheader 457 subHeader = SubHeader() 458 subHeader.firstCode = secondByte 459 subHeader.entryCount = 1 460 subHeader.glyphIndexArray.append(gid) 461 subHeaderList.append(subHeader) 462 subHeaderKeys[firstbyte] = len(subHeaderList) -1 463 lastFirstByte = firstbyte 464 else: 465 # need to fill in with notdefs all the code points between the last charCode and the current charCode. 466 codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount) 467 for i in range(codeDiff): 468 subHeader.glyphIndexArray.append(notdefGI) 469 subHeader.glyphIndexArray.append(gid) 470 subHeader.entryCount = subHeader.entryCount + codeDiff + 1 471 472 # fix GI's and iDelta of last subheader that we we added to the subheader array. 473 self.setIDDelta(subHeader) 474 475 # Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges. 476 subHeader = SubHeader() 477 subHeader.firstCode = 0 478 subHeader.entryCount = 0 479 subHeader.idDelta = 0 480 subHeader.idRangeOffset = 2 481 subHeaderList.append(subHeader) 482 emptySubheadIndex = len(subHeaderList) - 1 483 for index in range(256): 484 if subHeaderKeys[index] == kEmptyTwoCharCodeRange: 485 subHeaderKeys[index] = emptySubheadIndex 486 # Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the 487 # idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray, 488 # since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with 489 # charcode 0 and GID 0. 490 491 idRangeOffset = (len(subHeaderList)-1)*8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset. 492 subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2. 493 for index in range(subheadRangeLen): 494 subHeader = subHeaderList[index] 495 subHeader.idRangeOffset = 0 496 for j in range(index): 497 prevSubhead = subHeaderList[j] 498 if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray 499 subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8 500 subHeader.glyphIndexArray = [] 501 break 502 if subHeader.idRangeOffset == 0: # didn't find one. 503 subHeader.idRangeOffset = idRangeOffset 504 idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray. 505 else: 506 idRangeOffset = idRangeOffset - 8 # one less subheader 507 508 # Now we can write out the data! 509 length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array. 510 for subhead in subHeaderList[:-1]: 511 length = length + len(subhead.glyphIndexArray)*2 # We can't use subhead.entryCount, as some of the subhead may share subArrays. 512 dataList = [struct.pack(">HHH", 2, length, self.language)] 513 for index in subHeaderKeys: 514 dataList.append(struct.pack(">H", index*8)) 515 for subhead in subHeaderList: 516 dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset)) 517 for subhead in subHeaderList[:-1]: 518 for gi in subhead.glyphIndexArray: 519 dataList.append(struct.pack(">H", gi)) 520 data = "".join(dataList) 521 assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length) 522 return data 523 524 525 def fromXML(self, name, attrs, content, ttFont): 526 self.language = safeEval(attrs["language"]) 527 if not hasattr(self, "cmap"): 528 self.cmap = {} 529 cmap = self.cmap 530 531 for element in content: 532 if type(element) != TupleType: 533 continue 534 name, attrs, content = element 535 if name != "map": 536 continue 537 cmap[safeEval(attrs["code"])] = attrs["name"] 538 539 540cmap_format_4_format = ">7H" 541 542#uint16 endCode[segCount] # Ending character code for each segment, last = 0xFFFF. 543#uint16 reservedPad # This value should be zero 544#uint16 startCode[segCount] # Starting character code for each segment 545#uint16 idDelta[segCount] # Delta for all character codes in segment 546#uint16 idRangeOffset[segCount] # Offset in bytes to glyph indexArray, or 0 547#uint16 glyphIndexArray[variable] # Glyph index array 548 549def splitRange(startCode, endCode, cmap): 550 # Try to split a range of character codes into subranges with consecutive 551 # glyph IDs in such a way that the cmap4 subtable can be stored "most" 552 # efficiently. I can't prove I've got the optimal solution, but it seems 553 # to do well with the fonts I tested: none became bigger, many became smaller. 554 if startCode == endCode: 555 return [], [endCode] 556 557 lastID = cmap[startCode] 558 lastCode = startCode 559 inOrder = None 560 orderedBegin = None 561 subRanges = [] 562 563 # Gather subranges in which the glyph IDs are consecutive. 564 for code in range(startCode + 1, endCode + 1): 565 glyphID = cmap[code] 566 567 if glyphID - 1 == lastID: 568 if inOrder is None or not inOrder: 569 inOrder = 1 570 orderedBegin = lastCode 571 else: 572 if inOrder: 573 inOrder = 0 574 subRanges.append((orderedBegin, lastCode)) 575 orderedBegin = None 576 577 lastID = glyphID 578 lastCode = code 579 580 if inOrder: 581 subRanges.append((orderedBegin, lastCode)) 582 assert lastCode == endCode 583 584 # Now filter out those new subranges that would only make the data bigger. 585 # A new segment cost 8 bytes, not using a new segment costs 2 bytes per 586 # character. 587 newRanges = [] 588 for b, e in subRanges: 589 if b == startCode and e == endCode: 590 break # the whole range, we're fine 591 if b == startCode or e == endCode: 592 threshold = 4 # split costs one more segment 593 else: 594 threshold = 8 # split costs two more segments 595 if (e - b + 1) > threshold: 596 newRanges.append((b, e)) 597 subRanges = newRanges 598 599 if not subRanges: 600 return [], [endCode] 601 602 if subRanges[0][0] != startCode: 603 subRanges.insert(0, (startCode, subRanges[0][0] - 1)) 604 if subRanges[-1][1] != endCode: 605 subRanges.append((subRanges[-1][1] + 1, endCode)) 606 607 # Fill the "holes" in the segments list -- those are the segments in which 608 # the glyph IDs are _not_ consecutive. 609 i = 1 610 while i < len(subRanges): 611 if subRanges[i-1][1] + 1 != subRanges[i][0]: 612 subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1)) 613 i = i + 1 614 i = i + 1 615 616 # Transform the ranges into startCode/endCode lists. 617 start = [] 618 end = [] 619 for b, e in subRanges: 620 start.append(b) 621 end.append(e) 622 start.pop(0) 623 624 assert len(start) + 1 == len(end) 625 return start, end 626 627 628class cmap_format_4(CmapSubtable): 629 630 def decompile(self, data, ttFont): 631 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 632 # If not, someone is calling the subtable decompile() directly, and must provide both args. 633 if data != None and ttFont != None: 634 self.decompileHeader(self.data[offset:offset+int(length)], ttFont) 635 else: 636 assert (data == None and ttFont == None), "Need both data and ttFont arguments" 637 638 data = self.data # decompileHeader assigns the data after the header to self.data 639 (segCountX2, searchRange, entrySelector, rangeShift) = \ 640 struct.unpack(">4H", data[:8]) 641 data = data[8:] 642 segCount = segCountX2 / 2 643 644 allCodes = array.array("H") 645 allCodes.fromstring(data) 646 self.data = data = None 647 648 if sys.byteorder != "big": 649 allCodes.byteswap() 650 651 # divide the data 652 endCode = allCodes[:segCount] 653 allCodes = allCodes[segCount+1:] # the +1 is skipping the reservedPad field 654 startCode = allCodes[:segCount] 655 allCodes = allCodes[segCount:] 656 idDelta = allCodes[:segCount] 657 allCodes = allCodes[segCount:] 658 idRangeOffset = allCodes[:segCount] 659 glyphIndexArray = allCodes[segCount:] 660 lenGIArray = len(glyphIndexArray) 661 662 # build 2-byte character mapping 663 charCodes = [] 664 gids = [] 665 for i in range(len(startCode) - 1): # don't do 0xffff! 666 rangeCharCodes = range(startCode[i], endCode[i] + 1) 667 charCodes = charCodes + rangeCharCodes 668 for charCode in rangeCharCodes: 669 rangeOffset = idRangeOffset[i] 670 if rangeOffset == 0: 671 glyphID = charCode + idDelta[i] 672 else: 673 # *someone* needs to get killed. 674 index = idRangeOffset[i] / 2 + (charCode - startCode[i]) + i - len(idRangeOffset) 675 assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array is not less than the length of the array (%d) !" % (i, index, lenGIArray) 676 if glyphIndexArray[index] != 0: # if not missing glyph 677 glyphID = glyphIndexArray[index] + idDelta[i] 678 else: 679 glyphID = 0 # missing glyph 680 gids.append(glyphID % 0x10000) 681 682 self.cmap = cmap = {} 683 lenCmap = len(gids) 684 glyphOrder = self.ttFont.getGlyphOrder() 685 try: 686 names = map(operator.getitem, [glyphOrder]*lenCmap, gids ) 687 except IndexError: 688 getGlyphName = self.ttFont.getGlyphName 689 names = map(getGlyphName, gids ) 690 map(operator.setitem, [cmap]*lenCmap, charCodes, names) 691 692 693 694 def setIDDelta(self, idDelta): 695 # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1. 696 # idDelta is a short, and must be between -32K and 32K 697 # startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1 698 # This means that we have a problem because we can need to assign to idDelta values 699 # between -(64K-2) and 64K -1. 700 # Since the final gi is reconstructed from the glyphArray GID by: 701 # (short)finalGID = (gid + idDelta) % 0x10000), 702 # we can get from a startCode of 0 to a final GID of 64 -1K by subtracting 1, and casting the 703 # negative number to an unsigned short. 704 # Similarly , we can get from a startCode of 64K-1 to a final GID of 1 by adding 2, because of 705 # the modulo arithmetic. 706 707 if idDelta > 0x7FFF: 708 idDelta = idDelta - 0x10000 709 elif idDelta < -0x7FFF: 710 idDelta = idDelta + 0x10000 711 712 return idDelta 713 714 715 def compile(self, ttFont): 716 if self.data: 717 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 718 719 from fontTools.ttLib.sfnt import maxPowerOfTwo 720 721 charCodes = self.cmap.keys() 722 lenCharCodes = len(charCodes) 723 if lenCharCodes == 0: 724 startCode = [0xffff] 725 endCode = [0xffff] 726 else: 727 charCodes.sort() 728 names = map(operator.getitem, [self.cmap]*lenCharCodes, charCodes) 729 nameMap = ttFont.getReverseGlyphMap() 730 try: 731 gids = map(operator.getitem, [nameMap]*lenCharCodes, names) 732 except KeyError: 733 nameMap = ttFont.getReverseGlyphMap(rebuild=1) 734 try: 735 gids = map(operator.getitem, [nameMap]*lenCharCodes, names) 736 except KeyError: 737 # allow virtual GIDs in format 4 tables 738 gids = [] 739 for name in names: 740 try: 741 gid = nameMap[name] 742 except KeyError: 743 try: 744 if (name[:3] == 'gid'): 745 gid = eval(name[3:]) 746 else: 747 gid = ttFont.getGlyphID(name) 748 except: 749 raise KeyError(name) 750 751 gids.append(gid) 752 cmap = {} # code:glyphID mapping 753 map(operator.setitem, [cmap]*len(charCodes), charCodes, gids) 754 755 # Build startCode and endCode lists. 756 # Split the char codes in ranges of consecutive char codes, then split 757 # each range in more ranges of consecutive/not consecutive glyph IDs. 758 # See splitRange(). 759 lastCode = charCodes[0] 760 endCode = [] 761 startCode = [lastCode] 762 for charCode in charCodes[1:]: # skip the first code, it's the first start code 763 if charCode == lastCode + 1: 764 lastCode = charCode 765 continue 766 start, end = splitRange(startCode[-1], lastCode, cmap) 767 startCode.extend(start) 768 endCode.extend(end) 769 startCode.append(charCode) 770 lastCode = charCode 771 endCode.append(lastCode) 772 startCode.append(0xffff) 773 endCode.append(0xffff) 774 775 # build up rest of cruft 776 idDelta = [] 777 idRangeOffset = [] 778 glyphIndexArray = [] 779 for i in range(len(endCode)-1): # skip the closing codes (0xffff) 780 indices = [] 781 for charCode in range(startCode[i], endCode[i] + 1): 782 indices.append(cmap[charCode]) 783 if (indices == range(indices[0], indices[0] + len(indices))): 784 idDeltaTemp = self.setIDDelta(indices[0] - startCode[i]) 785 idDelta.append( idDeltaTemp) 786 idRangeOffset.append(0) 787 else: 788 # someone *definitely* needs to get killed. 789 idDelta.append(0) 790 idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i)) 791 glyphIndexArray.extend(indices) 792 idDelta.append(1) # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef 793 idRangeOffset.append(0) 794 795 # Insane. 796 segCount = len(endCode) 797 segCountX2 = segCount * 2 798 maxExponent = maxPowerOfTwo(segCount) 799 searchRange = 2 * (2 ** maxExponent) 800 entrySelector = maxExponent 801 rangeShift = 2 * segCount - searchRange 802 803 charCodeArray = array.array("H", endCode + [0] + startCode) 804 idDeltaeArray = array.array("h", idDelta) 805 restArray = array.array("H", idRangeOffset + glyphIndexArray) 806 if sys.byteorder != "big": 807 charCodeArray.byteswap() 808 idDeltaeArray.byteswap() 809 restArray.byteswap() 810 data = charCodeArray.tostring() + idDeltaeArray.tostring() + restArray.tostring() 811 812 length = struct.calcsize(cmap_format_4_format) + len(data) 813 header = struct.pack(cmap_format_4_format, self.format, length, self.language, 814 segCountX2, searchRange, entrySelector, rangeShift) 815 return header + data 816 817 def fromXML(self, name, attrs, content, ttFont): 818 self.language = safeEval(attrs["language"]) 819 if not hasattr(self, "cmap"): 820 self.cmap = {} 821 cmap = self.cmap 822 823 for element in content: 824 if type(element) != TupleType: 825 continue 826 nameMap, attrsMap, dummyContent = element 827 if nameMap != "map": 828 assert 0, "Unrecognized keyword in cmap subtable" 829 cmap[safeEval(attrsMap["code"])] = attrsMap["name"] 830 831 832class cmap_format_6(CmapSubtable): 833 834 def decompile(self, data, ttFont): 835 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 836 # If not, someone is calling the subtable decompile() directly, and must provide both args. 837 if data != None and ttFont != None: 838 self.decompileHeader(data[offset:offset+int(length)], ttFont) 839 else: 840 assert (data == None and ttFont == None), "Need both data and ttFont arguments" 841 842 data = self.data # decompileHeader assigns the data after the header to self.data 843 firstCode, entryCount = struct.unpack(">HH", data[:4]) 844 firstCode = int(firstCode) 845 data = data[4:] 846 #assert len(data) == 2 * entryCount # XXX not true in Apple's Helvetica!!! 847 glyphIndexArray = array.array("H") 848 glyphIndexArray.fromstring(data[:2 * int(entryCount)]) 849 if sys.byteorder != "big": 850 glyphIndexArray.byteswap() 851 self.data = data = None 852 853 self.cmap = cmap = {} 854 855 lenArray = len(glyphIndexArray) 856 charCodes = range(firstCode, firstCode + lenArray ) 857 glyphOrder = self.ttFont.getGlyphOrder() 858 try: 859 names = map(operator.getitem, [glyphOrder]*lenArray, glyphIndexArray ) 860 except IndexError: 861 getGlyphName = self.ttFont.getGlyphName 862 names = map(getGlyphName, glyphIndexArray ) 863 map(operator.setitem, [cmap]*lenArray, charCodes, names) 864 865 def compile(self, ttFont): 866 if self.data: 867 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 868 cmap = self.cmap 869 codes = cmap.keys() 870 if codes: # yes, there are empty cmap tables. 871 codes = range(codes[0], codes[-1] + 1) 872 firstCode = codes[0] 873 valueList = [cmap.get(code, ".notdef") for code in codes] 874 valueList = map(ttFont.getGlyphID, valueList) 875 glyphIndexArray = array.array("H", valueList) 876 if sys.byteorder != "big": 877 glyphIndexArray.byteswap() 878 data = glyphIndexArray.tostring() 879 else: 880 data = "" 881 firstCode = 0 882 header = struct.pack(">HHHHH", 883 6, len(data) + 10, self.language, firstCode, len(codes)) 884 return header + data 885 886 def fromXML(self, name, attrs, content, ttFont): 887 self.language = safeEval(attrs["language"]) 888 if not hasattr(self, "cmap"): 889 self.cmap = {} 890 cmap = self.cmap 891 892 for element in content: 893 if type(element) != TupleType: 894 continue 895 name, attrs, content = element 896 if name != "map": 897 continue 898 cmap[safeEval(attrs["code"])] = attrs["name"] 899 900 901class cmap_format_12_or_13(CmapSubtable): 902 903 def __init__(self, format): 904 self.format = format 905 self.reserved = 0 906 self.data = None 907 self.ttFont = None 908 909 def decompileHeader(self, data, ttFont): 910 format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16]) 911 assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length) 912 self.format = format 913 self.reserved = reserved 914 self.length = length 915 self.language = language 916 self.nGroups = nGroups 917 self.data = data[16:] 918 self.ttFont = ttFont 919 920 def decompile(self, data, ttFont): 921 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 922 # If not, someone is calling the subtable decompile() directly, and must provide both args. 923 if data != None and ttFont != None: 924 self.decompileHeader(data[offset:offset+int(length)], ttFont) 925 else: 926 assert (data == None and ttFont == None), "Need both data and ttFont arguments" 927 928 data = self.data # decompileHeader assigns the data after the header to self.data 929 charCodes = [] 930 gids = [] 931 pos = 0 932 for i in range(self.nGroups): 933 startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] ) 934 pos += 12 935 lenGroup = 1 + endCharCode - startCharCode 936 charCodes += range(startCharCode, endCharCode +1) 937 gids += self._computeGIDs(glyphID, lenGroup) 938 self.data = data = None 939 self.cmap = cmap = {} 940 lenCmap = len(gids) 941 glyphOrder = self.ttFont.getGlyphOrder() 942 try: 943 names = map(operator.getitem, [glyphOrder]*lenCmap, gids ) 944 except IndexError: 945 getGlyphName = self.ttFont.getGlyphName 946 names = map(getGlyphName, gids ) 947 map(operator.setitem, [cmap]*lenCmap, charCodes, names) 948 949 def compile(self, ttFont): 950 if self.data: 951 return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data 952 charCodes = self.cmap.keys() 953 lenCharCodes = len(charCodes) 954 names = self.cmap.values() 955 nameMap = ttFont.getReverseGlyphMap() 956 try: 957 gids = map(operator.getitem, [nameMap]*lenCharCodes, names) 958 except KeyError: 959 nameMap = ttFont.getReverseGlyphMap(rebuild=1) 960 try: 961 gids = map(operator.getitem, [nameMap]*lenCharCodes, names) 962 except KeyError: 963 # allow virtual GIDs in format 12 tables 964 gids = [] 965 for name in names: 966 try: 967 gid = nameMap[name] 968 except KeyError: 969 try: 970 if (name[:3] == 'gid'): 971 gid = eval(name[3:]) 972 else: 973 gid = ttFont.getGlyphID(name) 974 except: 975 raise KeyError(name) 976 977 gids.append(gid) 978 979 cmap = {} # code:glyphID mapping 980 map(operator.setitem, [cmap]*len(charCodes), charCodes, gids) 981 982 charCodes.sort() 983 index = 0 984 startCharCode = charCodes[0] 985 startGlyphID = cmap[startCharCode] 986 lastGlyphID = startGlyphID - self._format_step 987 lastCharCode = startCharCode - 1 988 nGroups = 0 989 dataList = [] 990 maxIndex = len(charCodes) 991 for index in range(maxIndex): 992 charCode = charCodes[index] 993 glyphID = cmap[charCode] 994 if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode): 995 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) 996 startCharCode = charCode 997 startGlyphID = glyphID 998 nGroups = nGroups + 1 999 lastGlyphID = glyphID 1000 lastCharCode = charCode 1001 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) 1002 nGroups = nGroups + 1 1003 data = "".join(dataList) 1004 lengthSubtable = len(data) +16 1005 assert len(data) == (nGroups*12) == (lengthSubtable-16) 1006 return struct.pack(">HHLLL", self.format, self.reserved , lengthSubtable, self.language, nGroups) + data 1007 1008 def toXML(self, writer, ttFont): 1009 writer.begintag(self.__class__.__name__, [ 1010 ("platformID", self.platformID), 1011 ("platEncID", self.platEncID), 1012 ("format", self.format), 1013 ("reserved", self.reserved), 1014 ("length", self.length), 1015 ("language", self.language), 1016 ("nGroups", self.nGroups), 1017 ]) 1018 writer.newline() 1019 codes = self.cmap.items() 1020 codes.sort() 1021 self._writeCodes(codes, writer) 1022 writer.endtag(self.__class__.__name__) 1023 writer.newline() 1024 1025 def fromXML(self, name, attrs, content, ttFont): 1026 self.format = safeEval(attrs["format"]) 1027 self.reserved = safeEval(attrs["reserved"]) 1028 self.length = safeEval(attrs["length"]) 1029 self.language = safeEval(attrs["language"]) 1030 self.nGroups = safeEval(attrs["nGroups"]) 1031 if not hasattr(self, "cmap"): 1032 self.cmap = {} 1033 cmap = self.cmap 1034 1035 for element in content: 1036 if type(element) != TupleType: 1037 continue 1038 name, attrs, content = element 1039 if name != "map": 1040 continue 1041 cmap[safeEval(attrs["code"])] = attrs["name"] 1042 1043 1044class cmap_format_12(cmap_format_12_or_13): 1045 def __init__(self, format): 1046 cmap_format_12_or_13.__init__(self, format) 1047 self._format_step = 1 1048 1049 def _computeGIDs(self, startingGlyph, numberOfGlyphs): 1050 return range(startingGlyph, startingGlyph + numberOfGlyphs) 1051 1052 def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): 1053 return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode) 1054 1055 1056class cmap_format_13(cmap_format_12_or_13): 1057 def __init__(self, format): 1058 cmap_format_12_or_13.__init__(self, format) 1059 self._format_step = 0 1060 1061 def _computeGIDs(self, startingGlyph, numberOfGlyphs): 1062 return [startingGlyph] * numberOfGlyphs 1063 1064 def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): 1065 return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode) 1066 1067 1068def cvtToUVS(threeByteString): 1069 if sys.byteorder != "big": 1070 data = "\0" +threeByteString 1071 else: 1072 data = threeByteString + "\0" 1073 val, = struct.unpack(">L", data) 1074 return val 1075 1076def cvtFromUVS(val): 1077 if sys.byteorder != "big": 1078 threeByteString = struct.pack(">L", val)[1:] 1079 else: 1080 threeByteString = struct.pack(">L", val)[:3] 1081 return threeByteString 1082 1083def cmpUVSListEntry(first, second): 1084 uv1, glyphName1 = first 1085 uv2, glyphName2 = second 1086 1087 if (glyphName1 == None) and (glyphName2 != None): 1088 return -1 1089 elif (glyphName2 == None) and (glyphName1 != None): 1090 return 1 1091 1092 ret = cmp(uv1, uv2) 1093 if ret: 1094 return ret 1095 return cmp(glyphName1, glyphName2) 1096 1097 1098class cmap_format_14(CmapSubtable): 1099 1100 def decompileHeader(self, data, ttFont): 1101 format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10]) 1102 self.data = data[10:] 1103 self.length = length 1104 self.numVarSelectorRecords = numVarSelectorRecords 1105 self.ttFont = ttFont 1106 self.language = 0xFF # has no language. 1107 1108 def decompile(self, data, ttFont): 1109 if data != None and ttFont != None and ttFont.lazy: 1110 self.decompileHeader(data, ttFont) 1111 else: 1112 assert (data == None and ttFont == None), "Need both data and ttFont arguments" 1113 data = self.data 1114 1115 self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. 1116 uvsDict = {} 1117 recOffset = 0 1118 for n in range(self.numVarSelectorRecords): 1119 uvs, defOVSOffset, nonDefUVSOffset = struct.unpack(">3sLL", data[recOffset:recOffset +11]) 1120 recOffset += 11 1121 varUVS = cvtToUVS(uvs) 1122 if defOVSOffset: 1123 startOffset = defOVSOffset - 10 1124 numValues, = struct.unpack(">L", data[startOffset:startOffset+4]) 1125 startOffset +=4 1126 for r in range(numValues): 1127 uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4]) 1128 startOffset += 4 1129 firstBaseUV = cvtToUVS(uv) 1130 cnt = addtlCnt+1 1131 baseUVList = range(firstBaseUV, firstBaseUV+cnt) 1132 glyphList = [None]*cnt 1133 localUVList = zip(baseUVList, glyphList) 1134 try: 1135 uvsDict[varUVS].extend(localUVList) 1136 except KeyError: 1137 uvsDict[varUVS] = localUVList 1138 1139 if nonDefUVSOffset: 1140 startOffset = nonDefUVSOffset - 10 1141 numRecs, = struct.unpack(">L", data[startOffset:startOffset+4]) 1142 startOffset +=4 1143 localUVList = [] 1144 for r in range(numRecs): 1145 uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5]) 1146 startOffset += 5 1147 uv = cvtToUVS(uv) 1148 glyphName = self.ttFont.getGlyphName(gid) 1149 localUVList.append( [uv, glyphName] ) 1150 try: 1151 uvsDict[varUVS].extend(localUVList) 1152 except KeyError: 1153 uvsDict[varUVS] = localUVList 1154 1155 self.uvsDict = uvsDict 1156 1157 def toXML(self, writer, ttFont): 1158 writer.begintag(self.__class__.__name__, [ 1159 ("platformID", self.platformID), 1160 ("platEncID", self.platEncID), 1161 ("format", self.format), 1162 ("length", self.length), 1163 ("numVarSelectorRecords", self.numVarSelectorRecords), 1164 ]) 1165 writer.newline() 1166 uvsDict = self.uvsDict 1167 uvsList = uvsDict.keys() 1168 uvsList.sort() 1169 for uvs in uvsList: 1170 uvList = uvsDict[uvs] 1171 uvList.sort(cmpUVSListEntry) 1172 for uv, gname in uvList: 1173 if gname == None: 1174 gname = "None" 1175 # I use the arg rather than th keyword syntax in order to preserve the attribute order. 1176 writer.simpletag("map", [ ("uvs",hex(uvs)), ("uv",hex(uv)), ("name", gname)] ) 1177 writer.newline() 1178 writer.endtag(self.__class__.__name__) 1179 writer.newline() 1180 1181 def fromXML(self, name, attrs, content, ttFont): 1182 self.format = safeEval(attrs["format"]) 1183 self.length = safeEval(attrs["length"]) 1184 self.numVarSelectorRecords = safeEval(attrs["numVarSelectorRecords"]) 1185 self.language = 0xFF # provide a value so that CmapSubtable.__cmp__() won't fail 1186 if not hasattr(self, "cmap"): 1187 self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. 1188 if not hasattr(self, "uvsDict"): 1189 self.uvsDict = {} 1190 uvsDict = self.uvsDict 1191 1192 for element in content: 1193 if type(element) != TupleType: 1194 continue 1195 name, attrs, content = element 1196 if name != "map": 1197 continue 1198 uvs = safeEval(attrs["uvs"]) 1199 uv = safeEval(attrs["uv"]) 1200 gname = attrs["name"] 1201 if gname == "None": 1202 gname = None 1203 try: 1204 uvsDict[uvs].append( [uv, gname]) 1205 except KeyError: 1206 uvsDict[uvs] = [ [uv, gname] ] 1207 1208 1209 def compile(self, ttFont): 1210 if self.data: 1211 return struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) + self.data 1212 1213 uvsDict = self.uvsDict 1214 uvsList = uvsDict.keys() 1215 uvsList.sort() 1216 self.numVarSelectorRecords = len(uvsList) 1217 offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block. 1218 data = [] 1219 varSelectorRecords =[] 1220 for uvs in uvsList: 1221 entryList = uvsDict[uvs] 1222 1223 defList = filter(lambda entry: entry[1] == None, entryList) 1224 if defList: 1225 defList = map(lambda entry: entry[0], defList) 1226 defOVSOffset = offset 1227 defList.sort() 1228 1229 lastUV = defList[0] 1230 cnt = -1 1231 defRecs = [] 1232 for defEntry in defList: 1233 cnt +=1 1234 if (lastUV+cnt) != defEntry: 1235 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1) 1236 lastUV = defEntry 1237 defRecs.append(rec) 1238 cnt = 0 1239 1240 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt) 1241 defRecs.append(rec) 1242 1243 numDefRecs = len(defRecs) 1244 data.append(struct.pack(">L", numDefRecs)) 1245 data.extend(defRecs) 1246 offset += 4 + numDefRecs*4 1247 else: 1248 defOVSOffset = 0 1249 1250 ndefList = filter(lambda entry: entry[1] != None, entryList) 1251 if ndefList: 1252 nonDefUVSOffset = offset 1253 ndefList.sort() 1254 numNonDefRecs = len(ndefList) 1255 data.append(struct.pack(">L", numNonDefRecs)) 1256 offset += 4 + numNonDefRecs*5 1257 1258 for uv, gname in ndefList: 1259 gid = ttFont.getGlyphID(gname) 1260 ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid) 1261 data.append(ndrec) 1262 else: 1263 nonDefUVSOffset = 0 1264 1265 vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset) 1266 varSelectorRecords.append(vrec) 1267 1268 data = "".join(varSelectorRecords) + "".join(data) 1269 self.length = 10 + len(data) 1270 headerdata = struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) 1271 self.data = headerdata + data 1272 1273 return self.data 1274 1275 1276class cmap_format_unknown(CmapSubtable): 1277 1278 def toXML(self, writer, ttFont): 1279 cmapName = self.__class__.__name__[:12] + str(self.format) 1280 writer.begintag(cmapName, [ 1281 ("platformID", self.platformID), 1282 ("platEncID", self.platEncID), 1283 ]) 1284 writer.newline() 1285 writer.dumphex(self.data) 1286 writer.endtag(cmapName) 1287 writer.newline() 1288 1289 def fromXML(self, name, attrs, content, ttFont): 1290 self.data = readHex(content) 1291 self.cmap = {} 1292 1293 def decompileHeader(self, data, ttFont): 1294 self.language = 0 # dummy value 1295 self.data = data 1296 1297 def decompile(self, data, ttFont): 1298 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 1299 # If not, someone is calling the subtable decompile() directly, and must provide both args. 1300 if data != None and ttFont != None: 1301 self.decompileHeader(data[offset:offset+int(length)], ttFont) 1302 else: 1303 assert (data == None and ttFont == None), "Need both data and ttFont arguments" 1304 1305 def compile(self, ttFont): 1306 if self.data: 1307 return self.data 1308 else: 1309 return None 1310 1311cmap_classes = { 1312 0: cmap_format_0, 1313 2: cmap_format_2, 1314 4: cmap_format_4, 1315 6: cmap_format_6, 1316 12: cmap_format_12, 1317 13: cmap_format_13, 1318 14: cmap_format_14, 1319 } 1320