_c_m_a_p.py revision 542b9510e6a8909e35e99a5279b7c2ec57c78e3c
1cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)import DefaultTable 2cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)import struct 3cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)import string 4cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)import array 5cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)from fontTools import ttLib 6cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)from fontTools.misc.textTools import safeEval, readHex 7cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)from types import TupleType 8cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 9cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 10cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)class table__c_m_a_p(DefaultTable.DefaultTable): 11cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 12cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) def getcmap(self, platformID, platEncID): 13cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) for subtable in self.tables: 14cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) if (subtable.platformID == platformID and 15cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) subtable.platEncID == platEncID): 16cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) return subtable 17cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) return None # not found 18cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 19cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) def decompile(self, data, ttFont): 20cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) tableVersion, numSubTables = struct.unpack(">HH", data[:4]) 21cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) self.tableVersion = int(tableVersion) 22cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) self.tables = tables = [] 23cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) for i in range(numSubTables): 24cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) platformID, platEncID, offset = struct.unpack( 25cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) ">HHl", data[4+i*8:4+(i+1)*8]) 26cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) platformID, platEncID = int(platformID), int(platEncID) 27cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) format, length = struct.unpack(">HH", data[offset:offset+4]) 28cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) if not length: 29cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) continue # bogus cmap subtable? 30cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) if not cmap_classes.has_key(format): 31cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) table = cmap_format_unknown(format) 32cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) else: 33cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) table = cmap_classes[format](format) 34cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) table.platformID = platformID 35cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) table.platEncID = platEncID 36cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) table.decompile(data[offset:offset+int(length)], ttFont) 37cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) tables.append(table) 38cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 39cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) def compile(self, ttFont): 40cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) self.tables.sort() # sort according to the spec; see CmapSubtable.__cmp__() 41cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) numSubTables = len(self.tables) 42cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) totalOffset = 4 + 8 * numSubTables 43cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) data = struct.pack(">HH", self.tableVersion, numSubTables) 44cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) tableData = "" 45cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) done = {} # remember the data so we can reuse the "pointers" 46cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) for table in self.tables: 47cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) chunk = table.compile(ttFont) 48cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) if done.has_key(chunk): 49cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) offset = done[chunk] 50cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) else: 51cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) offset = done[chunk] = totalOffset + len(tableData) 52cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) tableData = tableData + chunk 53cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset) 54cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) return data + tableData 55cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 56cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) def toXML(self, writer, ttFont): 57cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) writer.simpletag("tableVersion", version=self.tableVersion) 58cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) writer.newline() 59cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) for table in self.tables: 60cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) table.toXML(writer, ttFont) 61cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 62cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) def fromXML(self, (name, attrs, content), ttFont): 63cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) if name == "tableVersion": 64cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) self.tableVersion = safeEval(attrs["version"]) 65cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) return 66cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) if name[:12] <> "cmap_format_": 67cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) return 68cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) if not hasattr(self, "tables"): 69cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) self.tables = [] 70cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) format = safeEval(name[12]) 71cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) if not cmap_classes.has_key(format): 72cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) table = cmap_format_unknown(format) 73cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) else: 74cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) table = cmap_classes[format](format) 75cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) table.platformID = safeEval(attrs["platformID"]) 76cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) table.platEncID = safeEval(attrs["platEncID"]) 77cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) table.fromXML((name, attrs, content), ttFont) 78cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) self.tables.append(table) 79cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 80cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 81cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)class CmapSubtable: 82cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 83cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) def __init__(self, format): 84cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) self.format = format 85cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 86 def toXML(self, writer, ttFont): 87 writer.begintag(self.__class__.__name__, [ 88 ("platformID", self.platformID), 89 ("platEncID", self.platEncID), 90 ]) 91 writer.newline() 92 writer.dumphex(self.compile(ttFont)) 93 writer.endtag(self.__class__.__name__) 94 writer.newline() 95 96 def fromXML(self, (name, attrs, content), ttFont): 97 self.decompile(readHex(content), ttFont) 98 99 def __cmp__(self, other): 100 # implemented so that list.sort() sorts according to the cmap spec. 101 selfTuple = ( 102 self.platformID, 103 self.platEncID, 104 self.version, 105 self.__dict__) 106 otherTuple = ( 107 other.platformID, 108 other.platEncID, 109 other.version, 110 other.__dict__) 111 return cmp(selfTuple, otherTuple) 112 113 114class cmap_format_0(CmapSubtable): 115 116 def decompile(self, data, ttFont): 117 format, length, version = struct.unpack(">HHH", data[:6]) 118 self.version = int(version) 119 assert len(data) == 262 == length 120 glyphIdArray = array.array("B") 121 glyphIdArray.fromstring(data[6:]) 122 self.cmap = cmap = {} 123 for charCode in range(len(glyphIdArray)): 124 cmap[charCode] = ttFont.getGlyphName(glyphIdArray[charCode]) 125 126 def compile(self, ttFont): 127 charCodes = self.cmap.keys() 128 charCodes.sort() 129 assert charCodes == range(256) # charCodes[charCode] == charCode 130 for charCode in charCodes: 131 # reusing the charCodes list! 132 charCodes[charCode] = ttFont.getGlyphID(self.cmap[charCode]) 133 glyphIdArray = array.array("B", charCodes) 134 data = struct.pack(">HHH", 0, 262, self.version) + glyphIdArray.tostring() 135 assert len(data) == 262 136 return data 137 138 def toXML(self, writer, ttFont): 139 writer.begintag(self.__class__.__name__, [ 140 ("platformID", self.platformID), 141 ("platEncID", self.platEncID), 142 ("version", self.version), 143 ]) 144 writer.newline() 145 items = self.cmap.items() 146 items.sort() 147 for code, name in items: 148 writer.simpletag("map", code=hex(code), name=name) 149 writer.newline() 150 writer.endtag(self.__class__.__name__) 151 writer.newline() 152 153 def fromXML(self, (name, attrs, content), ttFont): 154 self.version = safeEval(attrs["version"]) 155 self.cmap = {} 156 for element in content: 157 if type(element) <> TupleType: 158 continue 159 name, attrs, content = element 160 if name <> "map": 161 continue 162 self.cmap[safeEval(attrs["code"])] = attrs["name"] 163 164 165class cmap_format_2(CmapSubtable): 166 167 def decompile(self, data, ttFont): 168 format, length, version = struct.unpack(">HHH", data[:6]) 169 self.version = int(version) 170 self.data = data 171 172 def compile(self, ttFont): 173 return self.data 174 175 176cmap_format_4_format = ">7H" 177 178#uint16 endCode[segCount] # Ending character code for each segment, last = 0xFFFF. 179#uint16 reservedPad # This value should be zero 180#uint16 startCode[segCount] # Starting character code for each segment 181#uint16 idDelta[segCount] # Delta for all character codes in segment 182#uint16 idRangeOffset[segCount] # Offset in bytes to glyph indexArray, or 0 183#uint16 glyphIndexArray[variable] # Glyph index array 184 185def splitRange(startCode, endCode, cmap): 186 if startCode == endCode: 187 return [], [endCode] 188 189 allGlyphs = [(startCode, cmap[startCode])] # XXX 190 lastID = cmap[startCode] 191 lastCode = startCode 192 inOrder = None 193 orderedBegin = None 194 parts = [] 195 196 for code in range(startCode + 1, endCode + 1): 197 glyphID = cmap[code] 198 allGlyphs.append((code, glyphID)) # XXX 199 200 if glyphID - 1 == lastID: 201 if inOrder is None or not inOrder: 202 inOrder = 1 203 orderedBegin = lastCode 204 else: 205 if inOrder: 206 inOrder = 0 207 parts.append((orderedBegin, lastCode)) 208 orderedBegin = None 209 210 lastID = glyphID 211 lastCode = code 212 213 if inOrder: 214 parts.append((orderedBegin, lastCode)) 215 assert lastCode == endCode 216 217 newParts = [] 218 for b, e in parts: 219 if b == startCode and e == endCode: 220 break # the whole range, we're fine 221 if b == startCode or e == endCode: 222 threshold = 4 # split costs one more segment 223 else: 224 threshold = 8 # split costs two more segments 225 if (e - b + 1) > threshold: 226 newParts.append((b, e)) 227 parts = newParts 228 229 if not parts: 230 return [], [endCode] 231 232 if parts[0][0] != startCode: 233 parts.insert(0, (startCode, parts[0][0] - 1)) 234 if parts[-1][1] != endCode: 235 parts.append((parts[-1][1] + 1, endCode)) 236 i = 1 237 while i < len(parts): 238 if parts[i-1][1] + 1 != parts[i][0]: 239 parts.insert(i, (parts[i-1][1] + 1, parts[i][0] - 1)) 240 i = i + 1 241 i = i + 1 242 243 start = [] 244 end = [] 245 for b, e in parts: 246 start.append(b) 247 end.append(e) 248 start.pop(0) 249 250 assert len(start) + 1 == len(end) 251 return start, end 252 253 254class cmap_format_4(CmapSubtable): 255 256 def decompile(self, data, ttFont): 257 (format, length, self.version, segCountX2, 258 searchRange, entrySelector, rangeShift) = \ 259 struct.unpack(cmap_format_4_format, data[:14]) 260 assert len(data) == length, "corrupt cmap table (%d, %d)" % (len(data), length) 261 segCount = segCountX2 / 2 262 263 allCodes = array.array("H") 264 allCodes.fromstring(data[14:]) 265 if ttLib.endian <> "big": 266 allCodes.byteswap() 267 268 # divide the data 269 endCode = allCodes[:segCount] 270 allCodes = allCodes[segCount+1:] # the +1 is skipping the reservedPad field 271 startCode = allCodes[:segCount] 272 allCodes = allCodes[segCount:] 273 idDelta = allCodes[:segCount] 274 allCodes = allCodes[segCount:] 275 idRangeOffset = allCodes[:segCount] 276 glyphIndexArray = allCodes[segCount:] 277 278 #print ">>>> segCount", segCount, "len(glyphIndexArray):", len(glyphIndexArray), "len(data)", len(data) 279 280 # build 2-byte character mapping 281 cmap = {} 282 for i in range(len(startCode) - 1): # don't do 0xffff! 283 for charCode in range(startCode[i], endCode[i] + 1): 284 rangeOffset = idRangeOffset[i] 285 if rangeOffset == 0: 286 glyphID = charCode + idDelta[i] 287 else: 288 # *someone* needs to get killed. 289 index = idRangeOffset[i] / 2 + (charCode - startCode[i]) + i - len(idRangeOffset) 290 if glyphIndexArray[index] <> 0: # if not missing glyph 291 glyphID = glyphIndexArray[index] + idDelta[i] 292 else: 293 glyphID = 0 # missing glyph 294 cmap[charCode] = ttFont.getGlyphName(glyphID % 0x10000) 295 self.cmap = cmap 296 297 def compile(self, ttFont): 298 from fontTools.ttLib.sfnt import maxPowerOfTwo 299 300 cmap = {} # code:glyphID mapping 301 for code, glyphName in self.cmap.items(): 302 cmap[code] = ttFont.getGlyphID(glyphName) 303 codes = cmap.keys() 304 codes.sort() 305 306 # build startCode and endCode lists 307 lastCode = codes[0] 308 endCode = [] 309 startCode = [lastCode] 310 for charCode in codes[1:]: # skip the first code, it's the first start code 311 if charCode == lastCode + 1: 312 lastCode = charCode 313 continue 314 start, end = splitRange(startCode[-1], lastCode, cmap) 315 startCode.extend(start) 316 endCode.extend(end) 317 startCode.append(charCode) 318 lastCode = charCode 319 endCode.append(lastCode) 320 startCode.append(0xffff) 321 endCode.append(0xffff) 322 323 # build up rest of cruft 324 idDelta = [] 325 idRangeOffset = [] 326 glyphIndexArray = [] 327 328 for i in range(len(endCode)-1): # skip the closing codes (0xffff) 329 indices = [] 330 for charCode in range(startCode[i], endCode[i] + 1): 331 indices.append(cmap[charCode]) 332 if indices == range(indices[0], indices[0] + len(indices)): 333 idDelta.append((indices[0] - startCode[i]) % 0x10000) 334 idRangeOffset.append(0) 335 else: 336 # someone *definitely* needs to get killed. 337 idDelta.append(0) 338 idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i)) 339 glyphIndexArray.extend(indices) 340 idDelta.append(1) # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef 341 idRangeOffset.append(0) 342 343 # Insane. 344 segCount = len(endCode) 345 segCountX2 = segCount * 2 346 maxExponent = maxPowerOfTwo(segCount) 347 searchRange = 2 * (2 ** maxExponent) 348 entrySelector = maxExponent 349 rangeShift = 2 * segCount - searchRange 350 351 allCodes = array.array("H", 352 endCode + [0] + startCode + idDelta + idRangeOffset + glyphIndexArray) 353 if ttLib.endian <> "big": 354 allCodes.byteswap() 355 data = allCodes.tostring() 356 length = struct.calcsize(cmap_format_4_format) + len(data) 357 header = struct.pack(cmap_format_4_format, self.format, length, self.version, 358 segCountX2, searchRange, entrySelector, rangeShift) 359 data = header + data 360 361 #print "<<<< segCount", segCount, "len(glyphIndexArray):", len(glyphIndexArray), "len(data)", len(data) 362 363 return data 364 365 def toXML(self, writer, ttFont): 366 from fontTools.unicode import Unicode 367 codes = self.cmap.items() 368 codes.sort() 369 writer.begintag(self.__class__.__name__, [ 370 ("platformID", self.platformID), 371 ("platEncID", self.platEncID), 372 ("version", self.version), 373 ]) 374 writer.newline() 375 376 for code, name in codes: 377 writer.simpletag("map", code=hex(code), name=name) 378 writer.comment(Unicode[code]) 379 writer.newline() 380 381 writer.endtag(self.__class__.__name__) 382 writer.newline() 383 384 def fromXML(self, (name, attrs, content), ttFont): 385 self.version = safeEval(attrs["version"]) 386 self.cmap = {} 387 for element in content: 388 if type(element) <> TupleType: 389 continue 390 name, attrs, content = element 391 if name <> "map": 392 continue 393 self.cmap[safeEval(attrs["code"])] = attrs["name"] 394 395 396class cmap_format_6(CmapSubtable): 397 398 def decompile(self, data, ttFont): 399 format, length, version, firstCode, entryCount = struct.unpack( 400 ">HHHHH", data[:10]) 401 self.version = int(version) 402 firstCode = int(firstCode) 403 self.version = int(version) 404 data = data[10:] 405 #assert len(data) == 2 * entryCount # XXX not true in Apple's Helvetica!!! 406 glyphIndexArray = array.array("H") 407 glyphIndexArray.fromstring(data[:2 * int(entryCount)]) 408 if ttLib.endian <> "big": 409 glyphIndexArray.byteswap() 410 self.cmap = cmap = {} 411 for i in range(len(glyphIndexArray)): 412 glyphID = glyphIndexArray[i] 413 glyphName = ttFont.getGlyphName(glyphID) 414 cmap[i+firstCode] = glyphName 415 416 def compile(self, ttFont): 417 codes = self.cmap.keys() 418 codes.sort() 419 assert codes == range(codes[0], codes[0] + len(codes)) 420 glyphIndexArray = array.array("H", [0] * len(codes)) 421 firstCode = codes[0] 422 for i in range(len(codes)): 423 code = codes[i] 424 glyphIndexArray[code-firstCode] = ttFont.getGlyphID(self.cmap[code]) 425 if ttLib.endian <> "big": 426 glyphIndexArray.byteswap() 427 data = glyphIndexArray.tostring() 428 header = struct.pack(">HHHHH", 429 6, len(data) + 10, self.version, firstCode, len(self.cmap)) 430 return header + data 431 432 def toXML(self, writer, ttFont): 433 codes = self.cmap.items() 434 codes.sort() 435 writer.begintag(self.__class__.__name__, [ 436 ("platformID", self.platformID), 437 ("platEncID", self.platEncID), 438 ("version", self.version), 439 ]) 440 writer.newline() 441 442 for code, name in codes: 443 writer.simpletag("map", code=hex(code), name=name) 444 writer.newline() 445 446 writer.endtag(self.__class__.__name__) 447 writer.newline() 448 449 def fromXML(self, (name, attrs, content), ttFont): 450 self.version = safeEval(attrs["version"]) 451 self.cmap = {} 452 for element in content: 453 if type(element) <> TupleType: 454 continue 455 name, attrs, content = element 456 if name <> "map": 457 continue 458 self.cmap[safeEval(attrs["code"])] = attrs["name"] 459 460 461class cmap_format_unknown(CmapSubtable): 462 463 def decompile(self, data, ttFont): 464 self.data = data 465 466 def compile(self, ttFont): 467 return self.data 468 469 470cmap_classes = { 471 0: cmap_format_0, 472 2: cmap_format_2, 473 4: cmap_format_4, 474 6: cmap_format_6, 475 } 476 477 478