_c_m_a_p.py revision bafa66e665afa581b58391585f1792578a4d3d2d
1import DefaultTable 2import struct 3import array 4from fontTools import ttLib 5from fontTools.misc.textTools import safeEval, readHex 6from types import TupleType 7 8 9class table__c_m_a_p(DefaultTable.DefaultTable): 10 11 def getcmap(self, platformID, platEncID): 12 for subtable in self.tables: 13 if (subtable.platformID == platformID and 14 subtable.platEncID == platEncID): 15 return subtable 16 return None # not found 17 18 def decompile(self, data, ttFont): 19 tableVersion, numSubTables = struct.unpack(">HH", data[:4]) 20 self.tableVersion = int(tableVersion) 21 self.tables = tables = [] 22 for i in range(numSubTables): 23 platformID, platEncID, offset = struct.unpack( 24 ">HHl", data[4+i*8:4+(i+1)*8]) 25 platformID, platEncID = int(platformID), int(platEncID) 26 format, length = struct.unpack(">HH", data[offset:offset+4]) 27 if (format < 8) and not length: 28 continue # bogus cmap subtable? 29 if format in [8,10,12]: 30 format, reserved, length = struct.unpack(">HHL", data[offset:offset+8]) 31 if not cmap_classes.has_key(format): 32 table = cmap_format_unknown(format) 33 else: 34 table = cmap_classes[format](format) 35 table.platformID = platformID 36 table.platEncID = platEncID 37 table.decompile(data[offset:offset+int(length)], ttFont) 38 tables.append(table) 39 40 def compile(self, ttFont): 41 self.tables.sort() # sort according to the spec; see CmapSubtable.__cmp__() 42 numSubTables = len(self.tables) 43 totalOffset = 4 + 8 * numSubTables 44 data = struct.pack(">HH", self.tableVersion, numSubTables) 45 tableData = "" 46 done = {} # remember the data so we can reuse the "pointers" 47 for table in self.tables: 48 chunk = table.compile(ttFont) 49 if done.has_key(chunk): 50 offset = done[chunk] 51 else: 52 offset = done[chunk] = totalOffset + len(tableData) 53 tableData = tableData + chunk 54 data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset) 55 return data + tableData 56 57 def toXML(self, writer, ttFont): 58 writer.simpletag("tableVersion", version=self.tableVersion) 59 writer.newline() 60 for table in self.tables: 61 table.toXML(writer, ttFont) 62 63 def fromXML(self, (name, attrs, content), ttFont): 64 if name == "tableVersion": 65 self.tableVersion = safeEval(attrs["version"]) 66 return 67 if name[:12] <> "cmap_format_": 68 return 69 if not hasattr(self, "tables"): 70 self.tables = [] 71 format = safeEval(name[12]) 72 if not cmap_classes.has_key(format): 73 table = cmap_format_unknown(format) 74 else: 75 table = cmap_classes[format](format) 76 table.platformID = safeEval(attrs["platformID"]) 77 table.platEncID = safeEval(attrs["platEncID"]) 78 table.fromXML((name, attrs, content), ttFont) 79 self.tables.append(table) 80 81 82class CmapSubtable: 83 84 def __init__(self, format): 85 self.format = format 86 87 def toXML(self, writer, ttFont): 88 writer.begintag(self.__class__.__name__, [ 89 ("platformID", self.platformID), 90 ("platEncID", self.platEncID), 91 ]) 92 writer.newline() 93 writer.dumphex(self.compile(ttFont)) 94 writer.endtag(self.__class__.__name__) 95 writer.newline() 96 97 def fromXML(self, (name, attrs, content), ttFont): 98 self.decompile(readHex(content), ttFont) 99 100 def __cmp__(self, other): 101 # implemented so that list.sort() sorts according to the cmap spec. 102 selfTuple = ( 103 self.platformID, 104 self.platEncID, 105 self.version, 106 self.__dict__) 107 otherTuple = ( 108 other.platformID, 109 other.platEncID, 110 other.version, 111 other.__dict__) 112 return cmp(selfTuple, otherTuple) 113 114 115class cmap_format_0(CmapSubtable): 116 117 def decompile(self, data, ttFont): 118 format, length, version = struct.unpack(">HHH", data[:6]) 119 self.version = int(version) 120 assert len(data) == 262 == length 121 glyphIdArray = array.array("B") 122 glyphIdArray.fromstring(data[6:]) 123 self.cmap = cmap = {} 124 for charCode in range(len(glyphIdArray)): 125 cmap[charCode] = ttFont.getGlyphName(glyphIdArray[charCode]) 126 127 def compile(self, ttFont): 128 charCodes = self.cmap.keys() 129 charCodes.sort() 130 assert charCodes == range(256) # charCodes[charCode] == charCode 131 for charCode in charCodes: 132 # reusing the charCodes list! 133 charCodes[charCode] = ttFont.getGlyphID(self.cmap[charCode]) 134 glyphIdArray = array.array("B", charCodes) 135 data = struct.pack(">HHH", 0, 262, self.version) + glyphIdArray.tostring() 136 assert len(data) == 262 137 return data 138 139 def toXML(self, writer, ttFont): 140 writer.begintag(self.__class__.__name__, [ 141 ("platformID", self.platformID), 142 ("platEncID", self.platEncID), 143 ("version", self.version), 144 ]) 145 writer.newline() 146 items = self.cmap.items() 147 items.sort() 148 for code, name in items: 149 writer.simpletag("map", code=hex(code), name=name) 150 writer.newline() 151 writer.endtag(self.__class__.__name__) 152 writer.newline() 153 154 def fromXML(self, (name, attrs, content), ttFont): 155 self.version = safeEval(attrs["version"]) 156 self.cmap = {} 157 for element in content: 158 if type(element) <> TupleType: 159 continue 160 name, attrs, content = element 161 if name <> "map": 162 continue 163 self.cmap[safeEval(attrs["code"])] = attrs["name"] 164 165 166subHeaderFormat = ">HHhH" 167class SubHeader: 168 def __init__(self): 169 self.firstCode = None 170 self.entryCount = None 171 self.idDelta = None 172 self.idRangeOffset = None 173 self.glyphIndexArray = [] 174 175class cmap_format_2(CmapSubtable): 176 177 def decompile(self, data, ttFont): 178 format, length, version = struct.unpack(">HHH", data[:6]) 179 self.version = int(version) 180 data = data[6:] 181 subHeaderKeys = [] 182 maxSubHeaderindex = 0 183 184 # get the key array, and determine the number of subHeaders. 185 for i in range(256): 186 key = struct.unpack(">H", data[:2])[0] 187 value = int(key)/8 188 if value > maxSubHeaderindex: 189 maxSubHeaderindex = value 190 data = data[2:] 191 subHeaderKeys.append(value) 192 193 #Load subHeaders 194 subHeaderList = [] 195 for i in range(maxSubHeaderindex + 1): 196 subHeader = SubHeader() 197 (subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \ 198 subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[:8]) 199 data = data[8:] 200 giData = data[subHeader.idRangeOffset-2:] 201 for j in range(subHeader.entryCount): 202 gi = struct.unpack(">H", giData[:2])[0] 203 giData = giData[2:] 204 subHeader.glyphIndexArray.append(int(gi)) 205 206 subHeaderList.append(subHeader) 207 208 # How this gets processed. 209 # Charcodes may be one or two bytes. 210 # The first byte of a charcode is mapped through the subHeaderKeys, to select 211 # a subHeader. For any subheader but 0, the next byte is then mapped through the 212 # selected subheader. If subheader Index 0 is selected, then the byte itself is 213 # mapped through the subheader, and there is no second byte. 214 # Then assume that the subsequent byte is the first byte of the next charcode,and repeat. 215 # 216 # Each subheader references a range in the glyphIndexArray whose length is entryCount. 217 # The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray 218 # referenced by another subheader. 219 # The only subheader that will be referenced by more than one first-byte value is the subheader 220 # that maps the entire range of glyphID values to glyphIndex 0, e.g notdef: 221 # {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx} 222 # A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex. 223 # A subheader specifies a subrange within (0...256) by the 224 # firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero 225 # (e.g. glyph not in font). 226 # If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar). 227 # The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by 228 # counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the 229 # glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex. 230 # Example for Logocut-Medium 231 # first byte of charcode = 129; selects subheader 1. 232 # subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252} 233 # second byte of charCode = 66 234 # the index offset = 66-64 = 2. 235 # The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is: 236 # [glyphIndexArray index], [subrange array index] = glyphIndex 237 # [256], [0]=1 from charcode [129, 64] 238 # [257], [1]=2 from charcode [129, 65] 239 # [258], [2]=3 from charcode [129, 66] 240 # [259], [3]=4 from charcode [129, 67] 241 # So, the glyphIndex = 3 from the array. Then if idDelta is not zero, add it to the glyphInex to get the final glyphIndex 242 # value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew! 243 # Has anyone ever really tried to overlap the subHeader subranges in the glyphIndexArray? I doubt it! 244 245 self.data = "" 246 self.cmap = {} 247 for firstByte in range(256): 248 subHeadindex = subHeaderKeys[firstByte] 249 subHeader = subHeaderList[subHeadindex] 250 if subHeadindex == 0: 251 if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount): 252 gi = 0 253 else: 254 charCode = firstByte 255 offsetIndex = firstByte - subHeader.firstCode 256 gi = subHeader.glyphIndexArray[offsetIndex] 257 if gi != 0: 258 gi = gi + subHeader.idDelta 259 gName = ttFont.getGlyphName(gi) 260 self.cmap[charCode] = gName 261 else: 262 if subHeader.entryCount: 263 for offsetIndex in range(subHeader.entryCount): 264 charCode = firstByte * 256 + offsetIndex + subHeader.firstCode 265 gi = subHeader.glyphIndexArray[offsetIndex] 266 if gi != 0: 267 gi = gi + subHeader.idDelta 268 gName = ttFont.getGlyphName(gi) 269 self.cmap[charCode] = gName 270 else: 271 # Is a subHead that maps to .notdef. We do need to record it, so we can later 272 # know that this firstByte value is the initial byte of a two byte charcode, 273 # as opposed to a sing byte charcode. 274 charCode = firstByte * 256 275 gName = ttFont.getGlyphName(0) 276 self.cmap[charCode] = gName 277 278 279 def compile(self, ttFont): 280 kEmptyTwoCharCodeRange = -1 281 items = self.cmap.items() 282 items.sort() 283 284 # All one-byte code values map through the subHeaderKeys table to subheader 0. 285 # Assume that all entries in the subHeaderKeys table are one-byte codes unless proven otherwise. 286 subHeaderKeys = [ 0 for x in range(256)] 287 subHeaderList = [] 288 289 lastFirstByte = -1 290 for item in items: 291 charCode = item[0] 292 firstbyte = charCode >> 8 293 secondByte = charCode & 0x00FF 294 gi = ttFont.getGlyphID(item[1]) 295 if firstbyte != lastFirstByte: 296 if lastFirstByte > -1: 297 # fix GI's and iDelta of last subheader. 298 subHeader.idDelta = 0 299 if subHeader.entryCount > 0: 300 minGI = min(subHeader.glyphIndexArray) -1 301 if minGI > 0: 302 subHeader.idDelta = minGI 303 for i in range(subHeader.entryCount): 304 subHeader.glyphIndexArray[i] = subHeader.glyphIndexArray[i] - minGI 305 assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange." 306 # init new subheader 307 subHeader = SubHeader() 308 subHeader.firstCode = secondByte 309 if (secondByte == 0) and ( gi==0 ) and (lastFirstByte > -1): # happens only when the font has no glyphs in the this charcpde range. 310 subHeader.entryCount = 0 311 subHeaderKeys[firstbyte] = kEmptyTwoCharCodeRange 312 else: 313 subHeader.entryCount = 1 314 subHeader.glyphIndexArray.append(gi) 315 subHeaderList.append(subHeader) 316 subHeaderKeys[firstbyte] = len(subHeaderList) -1 317 lastFirstByte = firstbyte 318 else: 319 assert (subHeader.entryCount != 0), "Error: we should never see another entry for an empty 2 byte charcode range." 320 codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount) 321 for i in range(codeDiff): 322 subHeader.glyphIndexArray.append(0) 323 subHeader.glyphIndexArray.append(gi) 324 subHeader.entryCount = subHeader.entryCount + codeDiff + 1 325 # fix GI's and iDelta of last subheader. 326 subHeader.idDelta = 0 327 if subHeader.entryCount > 0: 328 minGI = min(subHeader.glyphIndexArray) -1 329 if minGI > 0: 330 subHeader.idDelta = minGI 331 for i in range(subHeader.entryCount): 332 subHeaderList[i] = subHeaderList[i] - minGI 333 334 # Now we add a last subheader for the subHeaderKeys which mapped to empty two byte charcode ranges. 335 subHeader = SubHeader() 336 subHeader.firstCode = 0 337 subHeader.entryCount = 0 338 subHeader.idDelta = 0 339 subHeader.idRangeOffset = 2 340 subHeaderList.append(subHeader) 341 emptySubheadIndex = len(subHeaderList) - 1 342 for index in range(256): 343 if subHeaderKeys[index] < 0: 344 subHeaderKeys[index] = emptySubheadIndex 345 # Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the 346 # idRangeOffset word of this subHeader. we can safely point to the first entry in the GlyphIndexArray, 347 # since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with 348 # charcode 0 and GID 0. 349 350 # I am not going to try and optimise by trying to overlap the glyphIDArray subranges of the subheaders - 351 # I will just write them out sequentially. 352 idRangeOffset = (len(subHeaderList)-1)*8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset. 353 for subHeader in subHeaderList[:-1]: # skip last special empty-set subheader 354 subHeader.idRangeOffset = idRangeOffset 355 idRangeOffset = (idRangeOffset -8) + subHeader.entryCount*2 # one less subheader, one more subRange. 356 357 # Now we can write out the data! 358 length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array. 359 for subhead in subHeaderList[:-1]: 360 length = length + subhead.entryCount*2 361 data = struct.pack(">HHH", 2, length, self.version) 362 for index in subHeaderKeys: 363 data = data + struct.pack(">H", index*8) 364 for subhead in subHeaderList: 365 data = data + struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset) 366 for subhead in subHeaderList[:-1]: 367 for gi in subhead.glyphIndexArray: 368 data = data + struct.pack(">H", gi) 369 370 assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length) 371 return data 372 373 374 375 def toXML(self, writer, ttFont): 376 writer.begintag(self.__class__.__name__, [ 377 ("platformID", self.platformID), 378 ("platEncID", self.platEncID), 379 ("version", self.version), 380 ]) 381 writer.newline() 382 items = self.cmap.items() 383 items.sort() 384 for code, name in items: 385 writer.simpletag("map", code=hex(code), name=name) 386 writer.newline() 387 writer.endtag(self.__class__.__name__) 388 writer.newline() 389 390 def fromXML(self, (name, attrs, content), ttFont): 391 self.version = safeEval(attrs["version"]) 392 self.cmap = {} 393 for element in content: 394 if type(element) <> TupleType: 395 continue 396 name, attrs, content = element 397 if name <> "map": 398 continue 399 self.cmap[safeEval(attrs["code"])] = attrs["name"] 400 401 402cmap_format_4_format = ">7H" 403 404#uint16 endCode[segCount] # Ending character code for each segment, last = 0xFFFF. 405#uint16 reservedPad # This value should be zero 406#uint16 startCode[segCount] # Starting character code for each segment 407#uint16 idDelta[segCount] # Delta for all character codes in segment 408#uint16 idRangeOffset[segCount] # Offset in bytes to glyph indexArray, or 0 409#uint16 glyphIndexArray[variable] # Glyph index array 410 411def splitRange(startCode, endCode, cmap): 412 # Try to split a range of character codes into subranges with consecutive 413 # glyph IDs in such a way that the cmap4 subtable can be stored "most" 414 # efficiently. I can't prove I've got the optimal solution, but it seems 415 # to do well with the fonts I tested: none became bigger, many became smaller. 416 if startCode == endCode: 417 return [], [endCode] 418 419 lastID = cmap[startCode] 420 lastCode = startCode 421 inOrder = None 422 orderedBegin = None 423 subRanges = [] 424 425 # Gather subranges in which the glyph IDs are consecutive. 426 for code in range(startCode + 1, endCode + 1): 427 glyphID = cmap[code] 428 429 if glyphID - 1 == lastID: 430 if inOrder is None or not inOrder: 431 inOrder = 1 432 orderedBegin = lastCode 433 else: 434 if inOrder: 435 inOrder = 0 436 subRanges.append((orderedBegin, lastCode)) 437 orderedBegin = None 438 439 lastID = glyphID 440 lastCode = code 441 442 if inOrder: 443 subRanges.append((orderedBegin, lastCode)) 444 assert lastCode == endCode 445 446 # Now filter out those new subranges that would only make the data bigger. 447 # A new segment cost 8 bytes, not using a new segment costs 2 bytes per 448 # character. 449 newRanges = [] 450 for b, e in subRanges: 451 if b == startCode and e == endCode: 452 break # the whole range, we're fine 453 if b == startCode or e == endCode: 454 threshold = 4 # split costs one more segment 455 else: 456 threshold = 8 # split costs two more segments 457 if (e - b + 1) > threshold: 458 newRanges.append((b, e)) 459 subRanges = newRanges 460 461 if not subRanges: 462 return [], [endCode] 463 464 if subRanges[0][0] != startCode: 465 subRanges.insert(0, (startCode, subRanges[0][0] - 1)) 466 if subRanges[-1][1] != endCode: 467 subRanges.append((subRanges[-1][1] + 1, endCode)) 468 469 # Fill the "holes" in the segments list -- those are the segments in which 470 # the glyph IDs are _not_ consecutive. 471 i = 1 472 while i < len(subRanges): 473 if subRanges[i-1][1] + 1 != subRanges[i][0]: 474 subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1)) 475 i = i + 1 476 i = i + 1 477 478 # Transform the ranges into startCode/endCode lists. 479 start = [] 480 end = [] 481 for b, e in subRanges: 482 start.append(b) 483 end.append(e) 484 start.pop(0) 485 486 assert len(start) + 1 == len(end) 487 return start, end 488 489 490class cmap_format_4(CmapSubtable): 491 492 def decompile(self, data, ttFont): 493 (format, length, self.version, segCountX2, 494 searchRange, entrySelector, rangeShift) = \ 495 struct.unpack(cmap_format_4_format, data[:14]) 496 assert len(data) == length, "corrupt cmap table (%d, %d)" % (len(data), length) 497 segCount = segCountX2 / 2 498 499 allCodes = array.array("H") 500 allCodes.fromstring(data[14:]) 501 if ttLib.endian <> "big": 502 allCodes.byteswap() 503 504 # divide the data 505 endCode = allCodes[:segCount] 506 allCodes = allCodes[segCount+1:] # the +1 is skipping the reservedPad field 507 startCode = allCodes[:segCount] 508 allCodes = allCodes[segCount:] 509 idDelta = allCodes[:segCount] 510 allCodes = allCodes[segCount:] 511 idRangeOffset = allCodes[:segCount] 512 glyphIndexArray = allCodes[segCount:] 513 514 # build 2-byte character mapping 515 cmap = {} 516 for i in range(len(startCode) - 1): # don't do 0xffff! 517 for charCode in range(startCode[i], endCode[i] + 1): 518 rangeOffset = idRangeOffset[i] 519 if rangeOffset == 0: 520 glyphID = charCode + idDelta[i] 521 else: 522 # *someone* needs to get killed. 523 index = idRangeOffset[i] / 2 + (charCode - startCode[i]) + i - len(idRangeOffset) 524 if glyphIndexArray[index] <> 0: # if not missing glyph 525 glyphID = glyphIndexArray[index] + idDelta[i] 526 else: 527 glyphID = 0 # missing glyph 528 cmap[charCode] = ttFont.getGlyphName(glyphID % 0x10000) 529 self.cmap = cmap 530 531 def compile(self, ttFont): 532 from fontTools.ttLib.sfnt import maxPowerOfTwo 533 534 cmap = {} # code:glyphID mapping 535 for code, glyphName in self.cmap.items(): 536 cmap[code] = ttFont.getGlyphID(glyphName) 537 codes = cmap.keys() 538 codes.sort() 539 540 # Build startCode and endCode lists. 541 # Split the char codes in ranges of consecutive char codes, then split 542 # each range in more ranges of consecutive/not consecutive glyph IDs. 543 # See splitRange(). 544 lastCode = codes[0] 545 endCode = [] 546 startCode = [lastCode] 547 for charCode in codes[1:]: # skip the first code, it's the first start code 548 if charCode == lastCode + 1: 549 lastCode = charCode 550 continue 551 start, end = splitRange(startCode[-1], lastCode, cmap) 552 startCode.extend(start) 553 endCode.extend(end) 554 startCode.append(charCode) 555 lastCode = charCode 556 endCode.append(lastCode) 557 startCode.append(0xffff) 558 endCode.append(0xffff) 559 560 # build up rest of cruft 561 idDelta = [] 562 idRangeOffset = [] 563 glyphIndexArray = [] 564 565 for i in range(len(endCode)-1): # skip the closing codes (0xffff) 566 indices = [] 567 for charCode in range(startCode[i], endCode[i] + 1): 568 indices.append(cmap[charCode]) 569 if indices == range(indices[0], indices[0] + len(indices)): 570 idDelta.append((indices[0] - startCode[i]) % 0x10000) 571 idRangeOffset.append(0) 572 else: 573 # someone *definitely* needs to get killed. 574 idDelta.append(0) 575 idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i)) 576 glyphIndexArray.extend(indices) 577 idDelta.append(1) # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef 578 idRangeOffset.append(0) 579 580 # Insane. 581 segCount = len(endCode) 582 segCountX2 = segCount * 2 583 maxExponent = maxPowerOfTwo(segCount) 584 searchRange = 2 * (2 ** maxExponent) 585 entrySelector = maxExponent 586 rangeShift = 2 * segCount - searchRange 587 588 allCodes = array.array("H", 589 endCode + [0] + startCode + idDelta + idRangeOffset + glyphIndexArray) 590 if ttLib.endian <> "big": 591 allCodes.byteswap() 592 data = allCodes.tostring() 593 length = struct.calcsize(cmap_format_4_format) + len(data) 594 header = struct.pack(cmap_format_4_format, self.format, length, self.version, 595 segCountX2, searchRange, entrySelector, rangeShift) 596 data = header + data 597 return data 598 599 def toXML(self, writer, ttFont): 600 from fontTools.unicode import Unicode 601 codes = self.cmap.items() 602 codes.sort() 603 writer.begintag(self.__class__.__name__, [ 604 ("platformID", self.platformID), 605 ("platEncID", self.platEncID), 606 ("version", self.version), 607 ]) 608 writer.newline() 609 610 for code, name in codes: 611 writer.simpletag("map", code=hex(code), name=name) 612 writer.comment(Unicode[code]) 613 writer.newline() 614 615 writer.endtag(self.__class__.__name__) 616 writer.newline() 617 618 def fromXML(self, (name, attrs, content), ttFont): 619 self.version = safeEval(attrs["version"]) 620 self.cmap = {} 621 for element in content: 622 if type(element) <> TupleType: 623 continue 624 name, attrs, content = element 625 if name <> "map": 626 continue 627 self.cmap[safeEval(attrs["code"])] = attrs["name"] 628 629 630class cmap_format_6(CmapSubtable): 631 632 def decompile(self, data, ttFont): 633 format, length, version, firstCode, entryCount = struct.unpack( 634 ">HHHHH", data[:10]) 635 self.version = int(version) 636 firstCode = int(firstCode) 637 self.version = int(version) 638 data = data[10:] 639 #assert len(data) == 2 * entryCount # XXX not true in Apple's Helvetica!!! 640 glyphIndexArray = array.array("H") 641 glyphIndexArray.fromstring(data[:2 * int(entryCount)]) 642 if ttLib.endian <> "big": 643 glyphIndexArray.byteswap() 644 self.cmap = cmap = {} 645 for i in range(len(glyphIndexArray)): 646 glyphID = glyphIndexArray[i] 647 glyphName = ttFont.getGlyphName(glyphID) 648 cmap[i+firstCode] = glyphName 649 650 def compile(self, ttFont): 651 codes = self.cmap.keys() 652 codes.sort() 653 assert codes == range(codes[0], codes[0] + len(codes)) 654 glyphIndexArray = array.array("H", [0] * len(codes)) 655 firstCode = codes[0] 656 for i in range(len(codes)): 657 code = codes[i] 658 glyphIndexArray[code-firstCode] = ttFont.getGlyphID(self.cmap[code]) 659 if ttLib.endian <> "big": 660 glyphIndexArray.byteswap() 661 data = glyphIndexArray.tostring() 662 header = struct.pack(">HHHHH", 663 6, len(data) + 10, self.version, firstCode, len(self.cmap)) 664 return header + data 665 666 def toXML(self, writer, ttFont): 667 codes = self.cmap.items() 668 codes.sort() 669 writer.begintag(self.__class__.__name__, [ 670 ("platformID", self.platformID), 671 ("platEncID", self.platEncID), 672 ("version", self.version), 673 ]) 674 writer.newline() 675 676 for code, name in codes: 677 writer.simpletag("map", code=hex(code), name=name) 678 writer.newline() 679 680 writer.endtag(self.__class__.__name__) 681 writer.newline() 682 683 def fromXML(self, (name, attrs, content), ttFont): 684 self.version = safeEval(attrs["version"]) 685 self.cmap = {} 686 for element in content: 687 if type(element) <> TupleType: 688 continue 689 name, attrs, content = element 690 if name <> "map": 691 continue 692 self.cmap[safeEval(attrs["code"])] = attrs["name"] 693 694 695class cmap_format_12(CmapSubtable): 696 697 def decompile(self, data, ttFont): 698 format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16]) 699 data = data[16:] 700 assert len(data) == nGroups*12 == (length -16) 701 self.cmap = cmap = {} 702 for i in range(nGroups): 703 startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[:12] ) 704 data = data[12:] 705 while startCharCode <= endCharCode: 706 glyphName = ttFont.getGlyphName(glyphID) 707 cmap[startCharCode] = glyphName 708 glyphID = glyphID +1 709 startCharCode = startCharCode + 1 710 self.format = format 711 self.reserved = reserved 712 self.length = length 713 self.language = language 714 self.nGroups = nGroups 715 716 def compile(self, ttFont): 717 cmap = {} # code:glyphID mapping 718 for code, glyphName in self.cmap.items(): 719 cmap[code] = ttFont.getGlyphID(glyphName) 720 721 charCodes = self.cmap.keys() 722 charCodes.sort() 723 startCharCode = charCodes[0] 724 startGlyphID = cmap[startCharCode] 725 nextGlyphID = startGlyphID + 1 726 nGroups = 1 727 data = "" 728 for charCode in charCodes: 729 glyphID = cmap[charCode] 730 if glyphID != nextGlyphID: 731 endCharCode = charCode -1 732 data = data + struct.pack(">LLL", startCharCode, endCharCode, startGlyphID) 733 startGlyphID = glyphID 734 startCharCode = charCode 735 nGroups = nGroups + 1 736 nextGlyphID = glyphID +1 737 738 data = struct.pack(">HHLLL", self.format, 0 , len(data), self.language, nGroups) + data 739 return data 740 741 def toXML(self, writer, ttFont): 742 writer.begintag(self.__class__.__name__, [ 743 ("platformID", self.platformID), 744 ("platEncID", self.platEncID), 745 ("format", self.format), 746 ("reserved", self.reserved), 747 ("length", self.length), 748 ("language", self.language), 749 ("nGroups", self.nGroups), 750 ]) 751 writer.newline() 752 items = self.cmap.items() 753 items.sort() 754 for code, name in items: 755 writer.simpletag("map", code=hex(code), name=name) 756 writer.newline() 757 writer.endtag(self.__class__.__name__) 758 writer.newline() 759 760 def fromXML(self, (name, attrs, content), ttFont): 761 self.format = safeEval(attrs["format"]) 762 self.reserved = safeEval(attrs["reserved"]) 763 self.length = safeEval(attrs["length"]) 764 self.language = safeEval(attrs["language"]) 765 self.nGroups = safeEval(attrs["nGroups"]) 766 self.cmap = {} 767 for element in content: 768 if type(element) <> TupleType: 769 continue 770 name, attrs, content = element 771 if name <> "map": 772 continue 773 self.cmap[safeEval(attrs["code"])] = attrs["name"] 774 775 776class cmap_format_unknown(CmapSubtable): 777 778 def decompile(self, data, ttFont): 779 self.data = data 780 781 def compile(self, ttFont): 782 return self.data 783 784 785cmap_classes = { 786 0: cmap_format_0, 787 2: cmap_format_2, 788 4: cmap_format_4, 789 6: cmap_format_6, 790 12: cmap_format_12, 791 } 792 793 794