_c_m_a_p.py revision d299b55d14fa77411140c0cc1c2524583b4ffa58
1import DefaultTable 2import struct 3import array 4import Numeric 5import operator 6from fontTools import ttLib 7from fontTools.misc.textTools import safeEval, readHex 8from types import TupleType 9 10 11class table__c_m_a_p(DefaultTable.DefaultTable): 12 13 def getcmap(self, platformID, platEncID): 14 for subtable in self.tables: 15 if (subtable.platformID == platformID and 16 subtable.platEncID == platEncID): 17 return subtable 18 return None # not found 19 20 def decompile(self, data, ttFont): 21 tableVersion, numSubTables = struct.unpack(">HH", data[:4]) 22 self.tableVersion = int(tableVersion) 23 self.tables = tables = [] 24 seenOffsets = {} 25 for i in range(numSubTables): 26 platformID, platEncID, offset = struct.unpack( 27 ">HHl", data[4+i*8:4+(i+1)*8]) 28 platformID, platEncID = int(platformID), int(platEncID) 29 format, length = struct.unpack(">HH", data[offset:offset+4]) 30 if (format < 8) and not length: 31 continue # bogus cmap subtable? 32 if format in [8,10,12]: 33 format, reserved, length = struct.unpack(">HHL", data[offset:offset+8]) 34 if not cmap_classes.has_key(format): 35 table = cmap_format_unknown(format) 36 else: 37 table = cmap_classes[format](format) 38 table.platformID = platformID 39 table.platEncID = platEncID 40 # Note that by default we decompile only the subtable header info; 41 # any other data gets decompiled only when an attribute of the 42 # subtable is referenced. 43 table.decompileHeader(data[offset:offset+int(length)], ttFont) 44 if seenOffsets.has_key(offset): 45 table.cmap = tables[seenOffsets[offset]].cmap 46 else: 47 seenOffsets[offset] = i 48 tables.append(table) 49 50 def compile(self, ttFont): 51 self.tables.sort() # sort according to the spec; see CmapSubtable.__cmp__() 52 numSubTables = len(self.tables) 53 totalOffset = 4 + 8 * numSubTables 54 data = struct.pack(">HH", self.tableVersion, numSubTables) 55 tableData = "" 56 seen = {} # Some tables are the same object reference. Don't compile them twice. 57 done = {} # Some tables are different objects, but compile to the same data chunk 58 for table in self.tables: 59 try: 60 offset = seen[id(table.cmap)] 61 except KeyError: 62 chunk = table.compile(ttFont) 63 if done.has_key(chunk): 64 offset = done[chunk] 65 else: 66 offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData) 67 tableData = tableData + chunk 68 data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset) 69 return data + tableData 70 71 def toXML(self, writer, ttFont): 72 writer.simpletag("tableVersion", version=self.tableVersion) 73 writer.newline() 74 for table in self.tables: 75 table.toXML(writer, ttFont) 76 77 def fromXML(self, (name, attrs, content), ttFont): 78 if name == "tableVersion": 79 self.tableVersion = safeEval(attrs["version"]) 80 return 81 if name[:12] <> "cmap_format_": 82 return 83 if not hasattr(self, "tables"): 84 self.tables = [] 85 format = safeEval(name[12:]) 86 if not cmap_classes.has_key(format): 87 table = cmap_format_unknown(format) 88 else: 89 table = cmap_classes[format](format) 90 table.platformID = safeEval(attrs["platformID"]) 91 table.platEncID = safeEval(attrs["platEncID"]) 92 table.fromXML((name, attrs, content), ttFont) 93 self.tables.append(table) 94 95 96class CmapSubtable: 97 98 def __init__(self, format): 99 self.format = format 100 self.data = None 101 self.ttFont = None 102 103 def __getattr__(self, attr): 104 # allow lazy decompilation of subtables. 105 if attr[:2] == '__': # don't handle requests for member functions like '__lt__' 106 raise AttributeError, attr 107 if self.data == None: 108 raise AttributeError, attr 109 self.decompile(None, None) # use saved data. 110 self.data = None # Once this table has been decompiled, make sure we don't 111 # just return the original data. Also avoids recursion when 112 # called with an attribute that the cmap subtable doesn't have. 113 return getattr(self, attr) 114 115 def decompileHeader(self, data, ttFont): 116 format, length, language = struct.unpack(">HHH", data[:6]) 117 assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length) 118 self.format = int(format) 119 self.length = int(length) 120 self.language = int(language) 121 self.data = data[6:] 122 self.ttFont = ttFont 123 124 def toXML(self, writer, ttFont): 125 writer.begintag(self.__class__.__name__, [ 126 ("platformID", self.platformID), 127 ("platEncID", self.platEncID), 128 ("language", self.language), 129 ]) 130 writer.newline() 131 codes = self.cmap.items() 132 codes.sort() 133 self._writeCodes(codes, writer) 134 writer.endtag(self.__class__.__name__) 135 writer.newline() 136 137 def _writeCodes(self, codes, writer): 138 if (self.platformID, self.platEncID) == (3, 1) or (self.platformID, self.platEncID) == (3, 10) or self.platformID == 0: 139 from fontTools.unicode import Unicode 140 isUnicode = 1 141 else: 142 isUnicode = 0 143 for code, name in codes: 144 writer.simpletag("map", code=hex(code), name=name) 145 if isUnicode: 146 writer.comment(Unicode[code]) 147 writer.newline() 148 149 def __cmp__(self, other): 150 # implemented so that list.sort() sorts according to the cmap spec. 151 selfTuple = ( 152 self.platformID, 153 self.platEncID, 154 self.language, 155 self.__dict__) 156 otherTuple = ( 157 other.platformID, 158 other.platEncID, 159 other.language, 160 other.__dict__) 161 return cmp(selfTuple, otherTuple) 162 163 164class cmap_format_0(CmapSubtable): 165 166 def decompile(self, data, ttFont): 167 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 168 # If not, someone is calling the subtable decompile() directly, and must provide both args. 169 if data != None and ttFont != None: 170 self.decompileHeader(data[offset:offset+int(length)], ttFont) 171 else: 172 assert( (data == None and (ttFont == None), "Need both data and ttFont arguments")) 173 data = self.data # decompileHeader assigns the data after the header to self.data 174 assert 262 == self.length, "Format 0 cmap subtable not 262 bytes" 175 glyphIdArray = array.array("B") 176 glyphIdArray.fromstring(self.data) 177 self.cmap = cmap = {} 178 lenArray = len(glyphIdArray) 179 charCodes = range(lenArray) 180 names = map(self.ttFont.getGlyphName, glyphIdArray) 181 map(operator.setitem, [cmap]*lenArray, charCodes, names) 182 183 184 def compile(self, ttFont): 185 if self.data: 186 return struct.pack(">HHH", 0, 262, self.language) + self.data 187 188 charCodeList = self.cmap.items() 189 charCodeList.sort() 190 charCodes = [entry[0] for entry in charCodeList] 191 valueList = [entry[1] for entry in charCodeList] 192 assert charCodes == range(256) 193 valueList = map(ttFont.getGlyphID, valueList) 194 195 glyphIdArray = Numeric.array(valueList, Numeric.Int8) 196 data = struct.pack(">HHH", 0, 262, self.language) + glyphIdArray.tostring() 197 assert len(data) == 262 198 return data 199 200 def fromXML(self, (name, attrs, content), ttFont): 201 self.language = safeEval(attrs["language"]) 202 if not hasattr(self, "cmap"): 203 self.cmap = {} 204 cmap = self.cmap 205 for element in content: 206 if type(element) <> TupleType: 207 continue 208 name, attrs, content = element 209 if name <> "map": 210 continue 211 cmap[safeEval(attrs["code"])] = attrs["name"] 212 213 214subHeaderFormat = ">HHhH" 215class SubHeader: 216 def __init__(self): 217 self.firstCode = None 218 self.entryCount = None 219 self.idDelta = None 220 self.idRangeOffset = None 221 self.glyphIndexArray = [] 222 223class cmap_format_2(CmapSubtable): 224 225 def setIDDelta(self, subHeader): 226 subHeader.idDelta = 0 227 # find the minGI which is not zero. 228 minGI = subHeader.glyphIndexArray[0] 229 for gid in subHeader.glyphIndexArray: 230 if (gid != 0) and (gid < minGI): 231 minGI = gid 232 # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1. 233 # idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K. 234 # We would like to pick an idDelta such that the first glyphArray GID is 1, 235 # so that we are more likely to be able to combine glypharray GID subranges. 236 # This means that we have a problem when minGI is > 32K 237 # Since the final gi is reconstructed from the glyphArray GID by: 238 # (short)finalGID = (gid + idDelta) % 0x10000), 239 # we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the 240 # negative number to an unsigned short. 241 242 if (minGI > 1): 243 if minGI > 0x7FFF: 244 subHeader.idDelta = -(0x10000 - minGI) -1 245 else: 246 subHeader.idDelta = minGI -1 247 idDelta = subHeader.idDelta 248 for i in range(subHeader.entryCount): 249 gid = subHeader.glyphIndexArray[i] 250 if gid > 0: 251 subHeader.glyphIndexArray[i] = gid - idDelta 252 253 254 def decompile(self, data, ttFont): 255 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 256 # If not, someone is calling the subtable decompile() directly, and must provide both args. 257 if data != None and ttFont != None: 258 self.decompileHeader(data[offset:offset+int(length)], ttFont) 259 else: 260 assert( (data == None and (ttFont == None), "Need both data and ttFont arguments")) 261 262 data = self.data # decompileHeader assigns the data after the header to self.data 263 subHeaderKeys = [] 264 maxSubHeaderindex = 0 265 # get the key array, and determine the number of subHeaders. 266 allKeys = array.array("H") 267 allKeys.fromstring(data[:512]) 268 data = data[512:] 269 if ttLib.endian <> "big": 270 allKeys.byteswap() 271 subHeaderKeys = [ key/8 for key in allKeys] 272 maxSubHeaderindex = max(subHeaderKeys) 273 274 #Load subHeaders 275 subHeaderList = [] 276 pos = 0 277 for i in range(maxSubHeaderindex + 1): 278 subHeader = SubHeader() 279 (subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \ 280 subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8]) 281 pos += 8 282 giDataPos = pos + subHeader.idRangeOffset-2 283 giList = array.array("H") 284 giList.fromstring(data[giDataPos:giDataPos + subHeader.entryCount*2]) 285 if ttLib.endian <> "big": 286 giList.byteswap() 287 subHeader.glyphIndexArray = giList 288 subHeaderList.append(subHeader) 289 # How this gets processed. 290 # Charcodes may be one or two bytes. 291 # The first byte of a charcode is mapped through the subHeaderKeys, to select 292 # a subHeader. For any subheader but 0, the next byte is then mapped through the 293 # selected subheader. If subheader Index 0 is selected, then the byte itself is 294 # mapped through the subheader, and there is no second byte. 295 # Then assume that the subsequent byte is the first byte of the next charcode,and repeat. 296 # 297 # Each subheader references a range in the glyphIndexArray whose length is entryCount. 298 # The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray 299 # referenced by another subheader. 300 # The only subheader that will be referenced by more than one first-byte value is the subheader 301 # that maps the entire range of glyphID values to glyphIndex 0, e.g notdef: 302 # {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx} 303 # A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex. 304 # A subheader specifies a subrange within (0...256) by the 305 # firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero 306 # (e.g. glyph not in font). 307 # If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar). 308 # The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by 309 # counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the 310 # glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex. 311 # Example for Logocut-Medium 312 # first byte of charcode = 129; selects subheader 1. 313 # subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252} 314 # second byte of charCode = 66 315 # the index offset = 66-64 = 2. 316 # The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is: 317 # [glyphIndexArray index], [subrange array index] = glyphIndex 318 # [256], [0]=1 from charcode [129, 64] 319 # [257], [1]=2 from charcode [129, 65] 320 # [258], [2]=3 from charcode [129, 66] 321 # [259], [3]=4 from charcode [129, 67] 322 # So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero, 323 # add it to the glyphID to get the final glyphIndex 324 # value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew! 325 326 self.data = "" 327 self.cmap = cmap = {} 328 notdefGI = 0 329 for firstByte in range(256): 330 subHeadindex = subHeaderKeys[firstByte] 331 subHeader = subHeaderList[subHeadindex] 332 if subHeadindex == 0: 333 if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount): 334 continue # gi is notdef. 335 else: 336 charCode = firstByte 337 offsetIndex = firstByte - subHeader.firstCode 338 gi = subHeader.glyphIndexArray[offsetIndex] 339 if gi != 0: 340 gi = (gi + subHeader.idDelta) % 0x10000 341 else: 342 continue # gi is notdef. 343 cmap[charCode] = gi 344 else: 345 if subHeader.entryCount: 346 charCodeOffset = firstByte * 256 + subHeader.firstCode 347 for offsetIndex in range(subHeader.entryCount): 348 charCode = charCodeOffset + offsetIndex 349 gi = subHeader.glyphIndexArray[offsetIndex] 350 if gi != 0: 351 gi = (gi + subHeader.idDelta) % 0x10000 352 else: 353 continue 354 cmap[charCode] = gi 355 # If not subHeader.entryCount, then all char codes with this first byte are 356 # mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the 357 # same as mapping it to .notdef. 358 # cmap values are GID's. 359 glyphOrder = self.ttFont.getGlyphOrder() 360 gids = cmap.values() 361 charCodes = cmap.keys() 362 lenCmap = len(gids) 363 try: 364 names = map(operator.getitem, [glyphOrder]*lenCmap, gids ) 365 except IndexError: 366 getGlyphName = self.ttFont.getGlyphName 367 names = map(getGlyphName, gids ) 368 map(operator.setitem, [cmap]*lenCmap, charCodes, names) 369 370 371 def compile(self, ttFont): 372 if self.data: 373 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 374 kEmptyTwoCharCodeRange = -1 375 notdefGI = 0 376 377 items = self.cmap.items() 378 items.sort() 379 charCodes = [item[0] for item in items] 380 names = [item[1] for item in items] 381 nameMap = ttFont.getReverseGlyphMap() 382 lenCharCodes = len(charCodes) 383 try: 384 gids = map(operator.getitem, [nameMap]*lenCharCodes, names) 385 except KeyError: 386 nameMap = ttFont.getReverseGlyphMap(rebuild=1) 387 try: 388 gids = map(operator.getitem, [nameMap]*lenCharCodes, names) 389 except KeyError: 390 # allow virtual GIDs in format 2 tables 391 gids = [] 392 for name in names: 393 try: 394 gid = nameMap[name] 395 except KeyError: 396 try: 397 if (name[:3] == 'gid'): 398 gid = eval(name[3:]) 399 else: 400 gid = ttFont.getGlyphID(name) 401 except: 402 raise KeyError(name) 403 404 gids.append(gid) 405 406 # Process the (char code to gid) item list in char code order. 407 # By definition, all one byte char codes map to subheader 0. 408 # For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0, 409 # which defines all char codes in its range to map to notdef) unless proven otherwise. 410 # Note that since the char code items are processed in char code order, all the char codes with the 411 # same first byte are in sequential order. 412 413 subHeaderKeys = [ kEmptyTwoCharCodeRange for x in range(256)] # list of indices into subHeaderList. 414 subHeaderList = [] 415 416 # We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up 417 # with a cmap where all the one byte char codes map to notdef, 418 # with the result that the subhead 0 would not get created just by processing the item list. 419 charCode = charCodes[0] 420 if charCode > 255: 421 subHeader = SubHeader() 422 subHeader.firstCode = 0 423 subHeader.entryCount = 0 424 subHeader.idDelta = 0 425 subHeader.idRangeOffset = 0 426 subHeaderList.append(subHeader) 427 428 429 lastFirstByte = -1 430 items = zip(charCodes, gids) 431 for charCode, gid in items: 432 if gid == 0: 433 continue 434 firstbyte = charCode >> 8 435 secondByte = charCode & 0x00FF 436 437 if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one. 438 if lastFirstByte > -1: 439 # fix GI's and iDelta of current subheader. 440 self.setIDDelta(subHeader) 441 442 # If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero 443 # for the indices matching the char codes. 444 if lastFirstByte == 0: 445 for index in range(subHeader.entryCount): 446 charCode = subHeader.firstCode + index 447 subHeaderKeys[charCode] = 0 448 449 assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange." 450 # init new subheader 451 subHeader = SubHeader() 452 subHeader.firstCode = secondByte 453 subHeader.entryCount = 1 454 subHeader.glyphIndexArray.append(gid) 455 subHeaderList.append(subHeader) 456 subHeaderKeys[firstbyte] = len(subHeaderList) -1 457 lastFirstByte = firstbyte 458 else: 459 # need to fill in with notdefs all the code points between the last charCode and the current charCode. 460 codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount) 461 for i in range(codeDiff): 462 subHeader.glyphIndexArray.append(notdefGI) 463 subHeader.glyphIndexArray.append(gid) 464 subHeader.entryCount = subHeader.entryCount + codeDiff + 1 465 466 # fix GI's and iDelta of last subheader that we we added to the subheader array. 467 self.setIDDelta(subHeader) 468 469 # Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges. 470 subHeader = SubHeader() 471 subHeader.firstCode = 0 472 subHeader.entryCount = 0 473 subHeader.idDelta = 0 474 subHeader.idRangeOffset = 2 475 subHeaderList.append(subHeader) 476 emptySubheadIndex = len(subHeaderList) - 1 477 for index in range(256): 478 if subHeaderKeys[index] == kEmptyTwoCharCodeRange: 479 subHeaderKeys[index] = emptySubheadIndex 480 # Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the 481 # idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray, 482 # since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with 483 # charcode 0 and GID 0. 484 485 idRangeOffset = (len(subHeaderList)-1)*8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset. 486 subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2. 487 for index in range(subheadRangeLen): 488 subHeader = subHeaderList[index] 489 subHeader.idRangeOffset = 0 490 for j in range(index): 491 prevSubhead = subHeaderList[j] 492 if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray 493 subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8 494 subHeader.glyphIndexArray = [] 495 break 496 if subHeader.idRangeOffset == 0: # didn't find one. 497 subHeader.idRangeOffset = idRangeOffset 498 idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray. 499 else: 500 idRangeOffset = idRangeOffset - 8 # one less subheader 501 502 # Now we can write out the data! 503 length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array. 504 for subhead in subHeaderList[:-1]: 505 length = length + len(subhead.glyphIndexArray)*2 # We can't use subhead.entryCount, as some of the subhead may share subArrays. 506 dataList = [struct.pack(">HHH", 2, length, self.language)] 507 for index in subHeaderKeys: 508 dataList.append(struct.pack(">H", index*8)) 509 for subhead in subHeaderList: 510 dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset)) 511 for subhead in subHeaderList[:-1]: 512 for gi in subhead.glyphIndexArray: 513 dataList.append(struct.pack(">H", gi)) 514 data = "".join(dataList) 515 assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length) 516 return data 517 518 519 def fromXML(self, (name, attrs, content), ttFont): 520 self.language = safeEval(attrs["language"]) 521 if not hasattr(self, "cmap"): 522 self.cmap = {} 523 cmap = self.cmap 524 525 for element in content: 526 if type(element) <> TupleType: 527 continue 528 name, attrs, content = element 529 if name <> "map": 530 continue 531 cmap[safeEval(attrs["code"])] = attrs["name"] 532 533 534cmap_format_4_format = ">7H" 535 536#uint16 endCode[segCount] # Ending character code for each segment, last = 0xFFFF. 537#uint16 reservedPad # This value should be zero 538#uint16 startCode[segCount] # Starting character code for each segment 539#uint16 idDelta[segCount] # Delta for all character codes in segment 540#uint16 idRangeOffset[segCount] # Offset in bytes to glyph indexArray, or 0 541#uint16 glyphIndexArray[variable] # Glyph index array 542 543def splitRange(startCode, endCode, cmap): 544 # Try to split a range of character codes into subranges with consecutive 545 # glyph IDs in such a way that the cmap4 subtable can be stored "most" 546 # efficiently. I can't prove I've got the optimal solution, but it seems 547 # to do well with the fonts I tested: none became bigger, many became smaller. 548 if startCode == endCode: 549 return [], [endCode] 550 551 lastID = cmap[startCode] 552 lastCode = startCode 553 inOrder = None 554 orderedBegin = None 555 subRanges = [] 556 557 # Gather subranges in which the glyph IDs are consecutive. 558 for code in range(startCode + 1, endCode + 1): 559 glyphID = cmap[code] 560 561 if glyphID - 1 == lastID: 562 if inOrder is None or not inOrder: 563 inOrder = 1 564 orderedBegin = lastCode 565 else: 566 if inOrder: 567 inOrder = 0 568 subRanges.append((orderedBegin, lastCode)) 569 orderedBegin = None 570 571 lastID = glyphID 572 lastCode = code 573 574 if inOrder: 575 subRanges.append((orderedBegin, lastCode)) 576 assert lastCode == endCode 577 578 # Now filter out those new subranges that would only make the data bigger. 579 # A new segment cost 8 bytes, not using a new segment costs 2 bytes per 580 # character. 581 newRanges = [] 582 for b, e in subRanges: 583 if b == startCode and e == endCode: 584 break # the whole range, we're fine 585 if b == startCode or e == endCode: 586 threshold = 4 # split costs one more segment 587 else: 588 threshold = 8 # split costs two more segments 589 if (e - b + 1) > threshold: 590 newRanges.append((b, e)) 591 subRanges = newRanges 592 593 if not subRanges: 594 return [], [endCode] 595 596 if subRanges[0][0] != startCode: 597 subRanges.insert(0, (startCode, subRanges[0][0] - 1)) 598 if subRanges[-1][1] != endCode: 599 subRanges.append((subRanges[-1][1] + 1, endCode)) 600 601 # Fill the "holes" in the segments list -- those are the segments in which 602 # the glyph IDs are _not_ consecutive. 603 i = 1 604 while i < len(subRanges): 605 if subRanges[i-1][1] + 1 != subRanges[i][0]: 606 subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1)) 607 i = i + 1 608 i = i + 1 609 610 # Transform the ranges into startCode/endCode lists. 611 start = [] 612 end = [] 613 for b, e in subRanges: 614 start.append(b) 615 end.append(e) 616 start.pop(0) 617 618 assert len(start) + 1 == len(end) 619 return start, end 620 621 622class cmap_format_4(CmapSubtable): 623 624 def decompile(self, data, ttFont): 625 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 626 # If not, someone is calling the subtable decompile() directly, and must provide both args. 627 if data != None and ttFont != None: 628 self.decompileHeader(self.data[offset:offset+int(length)], ttFont) 629 else: 630 assert( (data == None and (ttFont == None), "Need both data and ttFont arguments")) 631 632 data = self.data # decompileHeader assigns the data after the header to self.data 633 (segCountX2, searchRange, entrySelector, rangeShift) = \ 634 struct.unpack(">4H", data[:8]) 635 data = data[8:] 636 segCount = segCountX2 / 2 637 638 allCodes = array.array("H") 639 allCodes.fromstring(data) 640 self.data = data = None 641 642 if ttLib.endian <> "big": 643 allCodes.byteswap() 644 645 # divide the data 646 endCode = allCodes[:segCount] 647 allCodes = allCodes[segCount+1:] # the +1 is skipping the reservedPad field 648 startCode = allCodes[:segCount] 649 allCodes = allCodes[segCount:] 650 idDelta = allCodes[:segCount] 651 allCodes = allCodes[segCount:] 652 idRangeOffset = allCodes[:segCount] 653 glyphIndexArray = allCodes[segCount:] 654 lenGIArray = len(glyphIndexArray) 655 656 # build 2-byte character mapping 657 charCodes = [] 658 gids = [] 659 for i in range(len(startCode) - 1): # don't do 0xffff! 660 rangeCharCodes = range(startCode[i], endCode[i] + 1) 661 charCodes = charCodes + rangeCharCodes 662 for charCode in rangeCharCodes: 663 rangeOffset = idRangeOffset[i] 664 if rangeOffset == 0: 665 glyphID = charCode + idDelta[i] 666 else: 667 # *someone* needs to get killed. 668 index = idRangeOffset[i] / 2 + (charCode - startCode[i]) + i - len(idRangeOffset) 669 assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array is not less than the length of the array (%d) !" % (i, index, lenGIArray) 670 if glyphIndexArray[index] <> 0: # if not missing glyph 671 glyphID = glyphIndexArray[index] + idDelta[i] 672 else: 673 glyphID = 0 # missing glyph 674 gids.append(glyphID % 0x10000) 675 676 self.cmap = cmap = {} 677 lenCmap = len(gids) 678 glyphOrder = self.ttFont.getGlyphOrder() 679 try: 680 names = map(operator.getitem, [glyphOrder]*lenCmap, gids ) 681 except IndexError: 682 getGlyphName = self.ttFont.getGlyphName 683 names = map(getGlyphName, gids ) 684 map(operator.setitem, [cmap]*lenCmap, charCodes, names) 685 686 687 688 def setIDDelta(self, idDelta): 689 # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1. 690 # idDelta is a short, and must be between -32K and 32K 691 # startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1 692 # This means that we have a problem because we can need to assign to idDelta values 693 # between -(64K-2) and 64K -1. 694 # Since the final gi is reconstructed from the glyphArray GID by: 695 # (short)finalGID = (gid + idDelta) % 0x10000), 696 # we can get from a startCode of 0 to a final GID of 64 -1K by subtracting 1, and casting the 697 # negative number to an unsigned short. 698 # Similarly , we can get from a startCode of 64K-1 to a final GID of 1 by adding 2, because of 699 # the modulo arithmetic. 700 701 if idDelta > 0x7FFF: 702 idDelta = idDelta - 0x10000 703 elif idDelta < -0x7FFF: 704 idDelta = idDelta + 0x10000 705 706 return idDelta 707 708 709 def compile(self, ttFont): 710 if self.data: 711 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 712 713 from fontTools.ttLib.sfnt import maxPowerOfTwo 714 715 charCodes = self.cmap.keys() 716 717 charCodes.sort() 718 lenCharCodes = len(charCodes) 719 if lenCharCodes == 0: 720 startCode = [0xffff] 721 endCode = [0xffff] 722 else: 723 names = self.cmap.values() 724 nameMap = ttFont.getReverseGlyphMap() 725 try: 726 gids = map(operator.getitem, [nameMap]*lenCharCodes, names) 727 except KeyError: 728 nameMap = ttFont.getReverseGlyphMap(rebuild=1) 729 try: 730 gids = map(operator.getitem, [nameMap]*lenCharCodes, names) 731 except KeyError: 732 # allow virtual GIDs in format 4 tables 733 gids = [] 734 for name in names: 735 try: 736 gid = nameMap[name] 737 except KeyError: 738 try: 739 if (name[:3] == 'gid'): 740 gid = eval(name[3:]) 741 else: 742 gid = ttFont.getGlyphID(name) 743 except: 744 raise KeyError(name) 745 746 gids.append(gid) 747 cmap = {} # code:glyphID mapping 748 map(operator.setitem, [cmap]*len(charCodes), charCodes, gids) 749 750 # Build startCode and endCode lists. 751 # Split the char codes in ranges of consecutive char codes, then split 752 # each range in more ranges of consecutive/not consecutive glyph IDs. 753 # See splitRange(). 754 lastCode = charCodes[0] 755 endCode = [] 756 startCode = [lastCode] 757 for charCode in charCodes[1:]: # skip the first code, it's the first start code 758 if charCode == lastCode + 1: 759 lastCode = charCode 760 continue 761 start, end = splitRange(startCode[-1], lastCode, cmap) 762 startCode.extend(start) 763 endCode.extend(end) 764 startCode.append(charCode) 765 lastCode = charCode 766 endCode.append(lastCode) 767 startCode.append(0xffff) 768 endCode.append(0xffff) 769 770 # build up rest of cruft 771 idDelta = [] 772 idRangeOffset = [] 773 glyphIndexArray = [] 774 for i in range(len(endCode)-1): # skip the closing codes (0xffff) 775 indices = [] 776 for charCode in range(startCode[i], endCode[i] + 1): 777 indices.append(cmap[charCode]) 778 if (indices == range(indices[0], indices[0] + len(indices))): 779 idDeltaTemp = self.setIDDelta(indices[0] - startCode[i]) 780 idDelta.append( idDeltaTemp) 781 idRangeOffset.append(0) 782 else: 783 # someone *definitely* needs to get killed. 784 idDelta.append(0) 785 idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i)) 786 glyphIndexArray.extend(indices) 787 idDelta.append(1) # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef 788 idRangeOffset.append(0) 789 790 # Insane. 791 segCount = len(endCode) 792 segCountX2 = segCount * 2 793 maxExponent = maxPowerOfTwo(segCount) 794 searchRange = 2 * (2 ** maxExponent) 795 entrySelector = maxExponent 796 rangeShift = 2 * segCount - searchRange 797 798 charCodeArray = Numeric.array( endCode + [0] + startCode, Numeric.UInt16) 799 idDeltaeArray = Numeric.array(idDelta, Numeric.Int16) 800 restArray = Numeric.array(idRangeOffset + glyphIndexArray, Numeric.UInt16) 801 if ttLib.endian <> "big": 802 charCodeArray = charCodeArray.byteswapped() 803 idDeltaeArray = idDeltaeArray.byteswapped() 804 restArray = restArray.byteswapped() 805 data = charCodeArray.tostring() + idDeltaeArray.tostring() + restArray.tostring() 806 807 length = struct.calcsize(cmap_format_4_format) + len(data) 808 header = struct.pack(cmap_format_4_format, self.format, length, self.language, 809 segCountX2, searchRange, entrySelector, rangeShift) 810 return header + data 811 812 def fromXML(self, (name, attrs, content), ttFont): 813 self.language = safeEval(attrs["language"]) 814 if not hasattr(self, "cmap"): 815 self.cmap = {} 816 cmap = self.cmap 817 818 for element in content: 819 if type(element) <> TupleType: 820 continue 821 nameMap, attrsMap, dummyContent = element 822 if nameMap <> "map": 823 assert 0, "Unrecognized keyword in cmap subtable" 824 cmap[safeEval(attrsMap["code"])] = attrsMap["name"] 825 826 827class cmap_format_6(CmapSubtable): 828 829 def decompile(self, data, ttFont): 830 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 831 # If not, someone is calling the subtable decompile() directly, and must provide both args. 832 if data != None and ttFont != None: 833 self.decompileHeader(data[offset:offset+int(length)], ttFont) 834 else: 835 assert( (data == None and (ttFont == None), "Need both data and ttFont arguments")) 836 837 data = self.data # decompileHeader assigns the data after the header to self.data 838 firstCode, entryCount = struct.unpack(">HH", data[:4]) 839 firstCode = int(firstCode) 840 data = data[4:] 841 #assert len(data) == 2 * entryCount # XXX not true in Apple's Helvetica!!! 842 glyphIndexArray = array.array("H") 843 glyphIndexArray.fromstring(data[:2 * int(entryCount)]) 844 if ttLib.endian <> "big": 845 glyphIndexArray.byteswap() 846 self.data = data = None 847 848 self.cmap = cmap = {} 849 850 lenArray = len(glyphIndexArray) 851 charCodes = range(firstCode, firstCode + lenArray ) 852 glyphOrder = self.ttFont.getGlyphOrder() 853 try: 854 names = map(operator.getitem, [glyphOrder]*lenArray, glyphIndexArray ) 855 except IndexError: 856 getGlyphName = self.ttFont.getGlyphName 857 names = map(getGlyphName, glyphIndexArray ) 858 map(operator.setitem, [cmap]*lenArray, charCodes, names) 859 860 def compile(self, ttFont): 861 if self.data: 862 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 863 cmap = self.cmap 864 codes = cmap.keys() 865 if codes: # yes, there are empty cmap tables. 866 codes.sort() 867 lenCodes = len(codes) 868 assert codes == range(codes[0], codes[0] + lenCodes) 869 firstCode = codes[0] 870 valueList = map(operator.getitem, [cmap]*lenCodes, codes) 871 valueList = map(ttFont.getGlyphID, valueList) 872 glyphIndexArray = Numeric.array(valueList, Numeric.UInt16) 873 if ttLib.endian <> "big": 874 glyphIndexArray = glyphIndexArray.byteswapped() 875 data = glyphIndexArray.tostring() 876 else: 877 data = "" 878 firstCode = 0 879 header = struct.pack(">HHHHH", 880 6, len(data) + 10, self.language, firstCode, len(codes)) 881 return header + data 882 883 def fromXML(self, (name, attrs, content), ttFont): 884 self.language = safeEval(attrs["language"]) 885 if not hasattr(self, "cmap"): 886 self.cmap = {} 887 cmap = self.cmap 888 889 for element in content: 890 if type(element) <> TupleType: 891 continue 892 name, attrs, content = element 893 if name <> "map": 894 continue 895 cmap[safeEval(attrs["code"])] = attrs["name"] 896 897 898class cmap_format_12(CmapSubtable): 899 900 def __init__(self, format): 901 self.format = format 902 self.reserved = 0 903 self.data = None 904 self.ttFont = None 905 906 def decompileHeader(self, data, ttFont): 907 format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16]) 908 assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format 12 (data length: %d, header length: %d)" % (len(data), length) 909 self.format = format 910 self.reserved = reserved 911 self.length = length 912 self.language = language 913 self.nGroups = nGroups 914 self.data = data[16:] 915 self.ttFont = ttFont 916 917 def decompile(self, data, ttFont): 918 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 919 # If not, someone is calling the subtable decompile() directly, and must provide both args. 920 if data != None and ttFont != None: 921 self.decompileHeader(data[offset:offset+int(length)], ttFont) 922 else: 923 assert( (data == None and (ttFont == None), "Need both data and ttFont arguments")) 924 925 data = self.data # decompileHeader assigns the data after the header to self.data 926 charCodes = [] 927 gids = [] 928 pos = 0 929 for i in range(self.nGroups): 930 startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] ) 931 pos += 12 932 lenGroup = 1 + endCharCode - startCharCode 933 charCodes += range(startCharCode, endCharCode +1) 934 gids += range(glyphID, glyphID + lenGroup) 935 self.data = data = None 936 self.cmap = cmap = {} 937 lenCmap = len(gids) 938 glyphOrder = self.ttFont.getGlyphOrder() 939 try: 940 names = map(operator.getitem, [glyphOrder]*lenCmap, gids ) 941 except IndexError: 942 getGlyphName = self.ttFont.getGlyphName 943 names = map(getGlyphName, gids ) 944 map(operator.setitem, [cmap]*lenCmap, charCodes, names) 945 946 def compile(self, ttFont): 947 if self.data: 948 return struct.pack(">HHLLL", self.format, self.reserved , self.length, self.language, self.nGroups) + self.data 949 charCodes = self.cmap.keys() 950 lenCharCodes = len(charCodes) 951 names = self.cmap.values() 952 nameMap = ttFont.getReverseGlyphMap() 953 try: 954 gids = map(operator.getitem, [nameMap]*lenCharCodes, names) 955 except KeyError: 956 nameMap = ttFont.getReverseGlyphMap(rebuild=1) 957 try: 958 gids = map(operator.getitem, [nameMap]*lenCharCodes, names) 959 except KeyError: 960 # allow virtual GIDs in format 12 tables 961 gids = [] 962 for name in names: 963 try: 964 gid = nameMap[name] 965 except KeyError: 966 try: 967 if (name[:3] == 'gid'): 968 gid = eval(name[3:]) 969 else: 970 gid = ttFont.getGlyphID(name) 971 except: 972 raise KeyError(name) 973 974 gids.append(gid) 975 976 cmap = {} # code:glyphID mapping 977 map(operator.setitem, [cmap]*len(charCodes), charCodes, gids) 978 979 charCodes.sort() 980 index = 0 981 startCharCode = charCodes[0] 982 startGlyphID = cmap[startCharCode] 983 lastGlyphID = startGlyphID - 1 984 lastCharCode = startCharCode - 1 985 nGroups = 0 986 dataList = [] 987 maxIndex = len(charCodes) 988 for index in range(maxIndex): 989 charCode = charCodes[index] 990 glyphID = cmap[charCode] 991 if (glyphID != 1 + lastGlyphID) or (charCode != 1 + lastCharCode): 992 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) 993 startCharCode = charCode 994 startGlyphID = glyphID 995 nGroups = nGroups + 1 996 lastGlyphID = glyphID 997 lastCharCode = charCode 998 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) 999 nGroups = nGroups + 1 1000 data = "".join(dataList) 1001 lengthSubtable = len(data) +16 1002 assert len(data) == (nGroups*12) == (lengthSubtable-16) 1003 return struct.pack(">HHLLL", self.format, self.reserved , lengthSubtable, self.language, nGroups) + data 1004 1005 def toXML(self, writer, ttFont): 1006 writer.begintag(self.__class__.__name__, [ 1007 ("platformID", self.platformID), 1008 ("platEncID", self.platEncID), 1009 ("format", self.format), 1010 ("reserved", self.reserved), 1011 ("length", self.length), 1012 ("language", self.language), 1013 ("nGroups", self.nGroups), 1014 ]) 1015 writer.newline() 1016 codes = self.cmap.items() 1017 codes.sort() 1018 self._writeCodes(codes, writer) 1019 writer.endtag(self.__class__.__name__) 1020 writer.newline() 1021 1022 def fromXML(self, (name, attrs, content), ttFont): 1023 self.format = safeEval(attrs["format"]) 1024 self.reserved = safeEval(attrs["reserved"]) 1025 self.length = safeEval(attrs["length"]) 1026 self.language = safeEval(attrs["language"]) 1027 self.nGroups = safeEval(attrs["nGroups"]) 1028 if not hasattr(self, "cmap"): 1029 self.cmap = {} 1030 cmap = self.cmap 1031 1032 for element in content: 1033 if type(element) <> TupleType: 1034 continue 1035 name, attrs, content = element 1036 if name <> "map": 1037 continue 1038 cmap[safeEval(attrs["code"])] = attrs["name"] 1039 1040 1041class cmap_format_unknown(CmapSubtable): 1042 1043 def toXML(self, writer, ttFont): 1044 cmapName = self.__class__.__name__[:12] + str(self.format) 1045 writer.begintag(cmapName, [ 1046 ("platformID", self.platformID), 1047 ("platEncID", self.platEncID), 1048 ]) 1049 writer.newline() 1050 writer.dumphex(self.data) 1051 writer.endtag(cmapName) 1052 writer.newline() 1053 1054 def fromXML(self, (name, attrs, content), ttFont): 1055 self.data = readHex(content) 1056 self.cmap = {} 1057 1058 def decompileHeader(self, data, ttFont): 1059 self.language = 0 # dummy value 1060 self.data = data 1061 1062 def decompile(self, data, ttFont): 1063 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 1064 # If not, someone is calling the subtable decompile() directly, and must provide both args. 1065 if data != None and ttFont != None: 1066 self.decompileHeader(data[offset:offset+int(length)], ttFont) 1067 else: 1068 assert( (data == None and (ttFont == None), "Need both data and ttFont arguments")) 1069 1070 def compile(self, ttFont): 1071 if self.data: 1072 return self.data 1073 else: 1074 return None 1075 1076cmap_classes = { 1077 0: cmap_format_0, 1078 2: cmap_format_2, 1079 4: cmap_format_4, 1080 6: cmap_format_6, 1081 12: cmap_format_12, 1082 } 1083