_c_m_a_p.py revision bafa66e665afa581b58391585f1792578a4d3d2d
1import DefaultTable
2import struct
3import array
4from fontTools import ttLib
5from fontTools.misc.textTools import safeEval, readHex
6from types import TupleType
7
8
9class table__c_m_a_p(DefaultTable.DefaultTable):
10
11	def getcmap(self, platformID, platEncID):
12		for subtable in self.tables:
13			if (subtable.platformID == platformID and
14					subtable.platEncID == platEncID):
15				return subtable
16		return None # not found
17
18	def decompile(self, data, ttFont):
19		tableVersion, numSubTables = struct.unpack(">HH", data[:4])
20		self.tableVersion = int(tableVersion)
21		self.tables = tables = []
22		for i in range(numSubTables):
23			platformID, platEncID, offset = struct.unpack(
24					">HHl", data[4+i*8:4+(i+1)*8])
25			platformID, platEncID = int(platformID), int(platEncID)
26			format, length = struct.unpack(">HH", data[offset:offset+4])
27			if (format < 8) and not length:
28				continue  # bogus cmap subtable?
29			if format in [8,10,12]:
30				format, reserved, length = struct.unpack(">HHL", data[offset:offset+8])
31			if not cmap_classes.has_key(format):
32				table = cmap_format_unknown(format)
33			else:
34				table = cmap_classes[format](format)
35			table.platformID = platformID
36			table.platEncID = platEncID
37			table.decompile(data[offset:offset+int(length)], ttFont)
38			tables.append(table)
39
40	def compile(self, ttFont):
41		self.tables.sort()    # sort according to the spec; see CmapSubtable.__cmp__()
42		numSubTables = len(self.tables)
43		totalOffset = 4 + 8 * numSubTables
44		data = struct.pack(">HH", self.tableVersion, numSubTables)
45		tableData = ""
46		done = {}  # remember the data so we can reuse the "pointers"
47		for table in self.tables:
48			chunk = table.compile(ttFont)
49			if done.has_key(chunk):
50				offset = done[chunk]
51			else:
52				offset = done[chunk] = totalOffset + len(tableData)
53				tableData = tableData + chunk
54			data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset)
55		return data + tableData
56
57	def toXML(self, writer, ttFont):
58		writer.simpletag("tableVersion", version=self.tableVersion)
59		writer.newline()
60		for table in self.tables:
61			table.toXML(writer, ttFont)
62
63	def fromXML(self, (name, attrs, content), ttFont):
64		if name == "tableVersion":
65			self.tableVersion = safeEval(attrs["version"])
66			return
67		if name[:12] <> "cmap_format_":
68			return
69		if not hasattr(self, "tables"):
70			self.tables = []
71		format = safeEval(name[12])
72		if not cmap_classes.has_key(format):
73			table = cmap_format_unknown(format)
74		else:
75			table = cmap_classes[format](format)
76		table.platformID = safeEval(attrs["platformID"])
77		table.platEncID = safeEval(attrs["platEncID"])
78		table.fromXML((name, attrs, content), ttFont)
79		self.tables.append(table)
80
81
82class CmapSubtable:
83
84	def __init__(self, format):
85		self.format = format
86
87	def toXML(self, writer, ttFont):
88		writer.begintag(self.__class__.__name__, [
89				("platformID", self.platformID),
90				("platEncID", self.platEncID),
91				])
92		writer.newline()
93		writer.dumphex(self.compile(ttFont))
94		writer.endtag(self.__class__.__name__)
95		writer.newline()
96
97	def fromXML(self, (name, attrs, content), ttFont):
98		self.decompile(readHex(content), ttFont)
99
100	def __cmp__(self, other):
101		# implemented so that list.sort() sorts according to the cmap spec.
102		selfTuple = (
103					self.platformID,
104					self.platEncID,
105					self.version,
106					self.__dict__)
107		otherTuple = (
108					other.platformID,
109					other.platEncID,
110					other.version,
111					other.__dict__)
112		return cmp(selfTuple, otherTuple)
113
114
115class cmap_format_0(CmapSubtable):
116
117	def decompile(self, data, ttFont):
118		format, length, version = struct.unpack(">HHH", data[:6])
119		self.version = int(version)
120		assert len(data) == 262 == length
121		glyphIdArray = array.array("B")
122		glyphIdArray.fromstring(data[6:])
123		self.cmap = cmap = {}
124		for charCode in range(len(glyphIdArray)):
125			cmap[charCode] = ttFont.getGlyphName(glyphIdArray[charCode])
126
127	def compile(self, ttFont):
128		charCodes = self.cmap.keys()
129		charCodes.sort()
130		assert charCodes == range(256)  # charCodes[charCode] == charCode
131		for charCode in charCodes:
132			# reusing the charCodes list!
133			charCodes[charCode] = ttFont.getGlyphID(self.cmap[charCode])
134		glyphIdArray = array.array("B", charCodes)
135		data = struct.pack(">HHH", 0, 262, self.version) + glyphIdArray.tostring()
136		assert len(data) == 262
137		return data
138
139	def toXML(self, writer, ttFont):
140		writer.begintag(self.__class__.__name__, [
141				("platformID", self.platformID),
142				("platEncID", self.platEncID),
143				("version", self.version),
144				])
145		writer.newline()
146		items = self.cmap.items()
147		items.sort()
148		for code, name in items:
149			writer.simpletag("map", code=hex(code), name=name)
150			writer.newline()
151		writer.endtag(self.__class__.__name__)
152		writer.newline()
153
154	def fromXML(self, (name, attrs, content), ttFont):
155		self.version = safeEval(attrs["version"])
156		self.cmap = {}
157		for element in content:
158			if type(element) <> TupleType:
159				continue
160			name, attrs, content = element
161			if name <> "map":
162				continue
163			self.cmap[safeEval(attrs["code"])] = attrs["name"]
164
165
166subHeaderFormat = ">HHhH"
167class SubHeader:
168	def __init__(self):
169		self.firstCode = None
170		self.entryCount = None
171		self.idDelta = None
172		self.idRangeOffset = None
173		self.glyphIndexArray = []
174
175class cmap_format_2(CmapSubtable):
176
177	def decompile(self, data, ttFont):
178		format, length, version = struct.unpack(">HHH", data[:6])
179		self.version = int(version)
180		data = data[6:]
181		subHeaderKeys = []
182		maxSubHeaderindex = 0
183
184		# get the key array, and determine the number of subHeaders.
185		for i in range(256):
186			key = struct.unpack(">H", data[:2])[0]
187			value = int(key)/8
188			if value > maxSubHeaderindex:
189				maxSubHeaderindex  = value
190			data = data[2:]
191			subHeaderKeys.append(value)
192
193		#Load subHeaders
194		subHeaderList = []
195		for i in range(maxSubHeaderindex + 1):
196			subHeader = SubHeader()
197			(subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \
198				subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[:8])
199			data = data[8:]
200			giData = data[subHeader.idRangeOffset-2:]
201			for j in range(subHeader.entryCount):
202				gi = struct.unpack(">H", giData[:2])[0]
203				giData = giData[2:]
204				subHeader.glyphIndexArray.append(int(gi))
205
206			subHeaderList.append(subHeader)
207
208		# How this gets processed.
209		# Charcodes may be one or two bytes.
210		# The first byte of a charcode is mapped through the  subHeaderKeys, to select
211		# a subHeader. For any subheader but 0, the next byte is then mapped through the
212		# selected subheader. If subheader Index 0 is selected, then the byte itself is
213		# mapped through the subheader, and there is no second byte.
214		# Then assume that the subsequent byte is the first byte of the next charcode,and repeat.
215		#
216		# Each subheader references a range in the glyphIndexArray whose length is entryCount.
217		# The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray
218		# referenced by another subheader.
219		# The only subheader that will be referenced by more than one first-byte value is the subheader
220		# that maps the entire range of glyphID values to glyphIndex 0, e.g notdef:
221		#	 {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx}
222		# A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex.
223		# A subheader specifies a subrange within (0...256) by the
224		# firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero
225		# (e.g. glyph not in font).
226		# If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar).
227		# The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by
228		# counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the
229		# glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex.
230		# Example for Logocut-Medium
231		# first byte of charcode = 129; selects subheader 1.
232		# subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252}
233		# second byte of charCode = 66
234		# the index offset = 66-64 = 2.
235		# The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is:
236		# [glyphIndexArray index], [subrange array index] = glyphIndex
237		# [256], [0]=1 	from charcode [129, 64]
238		# [257], [1]=2  	from charcode [129, 65]
239		# [258], [2]=3  	from charcode [129, 66]
240		# [259], [3]=4  	from charcode [129, 67]
241		# So, the glyphIndex = 3 from the array. Then if idDelta is not zero, add it to the glyphInex to get the final glyphIndex
242		# value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew!
243		# Has anyone ever really tried to overlap the subHeader subranges in the glyphIndexArray? I doubt it!
244
245		self.data = ""
246		self.cmap = {}
247		for firstByte in range(256):
248			subHeadindex = subHeaderKeys[firstByte]
249			subHeader = subHeaderList[subHeadindex]
250			if subHeadindex == 0:
251				if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount):
252					gi = 0
253				else:
254					charCode = firstByte
255					offsetIndex = firstByte - subHeader.firstCode
256					gi = subHeader.glyphIndexArray[offsetIndex]
257					if gi != 0:
258						gi = gi + subHeader.idDelta
259				gName = ttFont.getGlyphName(gi)
260				self.cmap[charCode] = gName
261			else:
262				if subHeader.entryCount:
263					for offsetIndex in range(subHeader.entryCount):
264						charCode = firstByte * 256 + offsetIndex + subHeader.firstCode
265						gi = subHeader.glyphIndexArray[offsetIndex]
266						if gi != 0:
267							gi = gi + subHeader.idDelta
268						gName = ttFont.getGlyphName(gi)
269						self.cmap[charCode] = gName
270				else:
271					# Is a subHead that maps to .notdef. We do need to record it, so we can later
272					# know that this firstByte value is the initial byte of a two byte charcode,
273					# as opposed to a sing byte charcode.
274					charCode = firstByte * 256
275					gName = ttFont.getGlyphName(0)
276					self.cmap[charCode] = gName
277
278
279	def compile(self, ttFont):
280		kEmptyTwoCharCodeRange = -1
281		items = self.cmap.items()
282		items.sort()
283
284		# All one-byte code values map through the subHeaderKeys table to subheader 0.
285		# Assume that all entries in the subHeaderKeys table are one-byte codes unless proven otherwise.
286		subHeaderKeys = [ 0 for x in  range(256)]
287		subHeaderList = []
288
289		lastFirstByte = -1
290		for item in items:
291			charCode = item[0]
292			firstbyte = charCode >> 8
293			secondByte = charCode & 0x00FF
294			gi = ttFont.getGlyphID(item[1])
295			if firstbyte != lastFirstByte:
296				if lastFirstByte > -1:
297					# fix GI's and iDelta of last subheader.
298					subHeader.idDelta = 0
299					if subHeader.entryCount > 0:
300						minGI = min(subHeader.glyphIndexArray) -1
301						if minGI > 0:
302							subHeader.idDelta = minGI
303							for i in range(subHeader.entryCount):
304								subHeader.glyphIndexArray[i] = subHeader.glyphIndexArray[i] - minGI
305					assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange."
306				# init new subheader
307				subHeader = SubHeader()
308				subHeader.firstCode = secondByte
309				if (secondByte == 0) and ( gi==0 ) and (lastFirstByte > -1): # happens only when the font has no glyphs in the this charcpde range.
310					subHeader.entryCount = 0
311					subHeaderKeys[firstbyte] = kEmptyTwoCharCodeRange
312				else:
313					subHeader.entryCount = 1
314					subHeader.glyphIndexArray.append(gi)
315					subHeaderList.append(subHeader)
316					subHeaderKeys[firstbyte] = len(subHeaderList) -1
317				lastFirstByte = firstbyte
318			else:
319				assert (subHeader.entryCount != 0), "Error: we should never see another entry for an empty 2 byte charcode range."
320				codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount)
321				for i in range(codeDiff):
322					subHeader.glyphIndexArray.append(0)
323				subHeader.glyphIndexArray.append(gi)
324				subHeader.entryCount = subHeader.entryCount + codeDiff + 1
325		# fix GI's and iDelta of last subheader.
326		subHeader.idDelta = 0
327		if subHeader.entryCount > 0:
328			minGI = min(subHeader.glyphIndexArray) -1
329			if minGI > 0:
330				subHeader.idDelta = minGI
331				for i in range(subHeader.entryCount):
332					subHeaderList[i] = subHeaderList[i] - minGI
333
334		# Now we add a last subheader for the subHeaderKeys which mapped to empty two byte charcode ranges.
335		subHeader = SubHeader()
336		subHeader.firstCode = 0
337		subHeader.entryCount = 0
338		subHeader.idDelta = 0
339		subHeader.idRangeOffset = 2
340		subHeaderList.append(subHeader)
341		emptySubheadIndex = len(subHeaderList) - 1
342		for index in range(256):
343			if subHeaderKeys[index] < 0:
344				subHeaderKeys[index] = emptySubheadIndex
345		# Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the
346		# idRangeOffset word of this subHeader. we can safely point to the first entry in the GlyphIndexArray,
347		# since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with
348		# charcode 0 and GID 0.
349
350		# I am not going to try and optimise by trying to overlap the glyphIDArray subranges of the subheaders -
351		# I will just write them out sequentially.
352		idRangeOffset = (len(subHeaderList)-1)*8  + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset.
353		for subHeader in subHeaderList[:-1]: # skip last special empty-set subheader
354			subHeader.idRangeOffset = idRangeOffset
355			idRangeOffset = (idRangeOffset -8) + subHeader.entryCount*2 # one less subheader, one more subRange.
356
357		# Now we can write out the data!
358		length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array.
359		for subhead in 	subHeaderList[:-1]:
360			length = length + subhead.entryCount*2
361		data = struct.pack(">HHH", 2, length, self.version)
362		for index in subHeaderKeys:
363			data = data + struct.pack(">H", index*8)
364		for subhead in 	subHeaderList:
365			data = data + struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset)
366		for subhead in 	subHeaderList[:-1]:
367			for gi in subhead.glyphIndexArray:
368				data = data + struct.pack(">H", gi)
369
370		assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length)
371		return data
372
373
374
375	def toXML(self, writer, ttFont):
376		writer.begintag(self.__class__.__name__, [
377				("platformID", self.platformID),
378				("platEncID", self.platEncID),
379				("version", self.version),
380				])
381		writer.newline()
382		items = self.cmap.items()
383		items.sort()
384		for code, name in items:
385			writer.simpletag("map", code=hex(code), name=name)
386			writer.newline()
387		writer.endtag(self.__class__.__name__)
388		writer.newline()
389
390	def fromXML(self, (name, attrs, content), ttFont):
391		self.version = safeEval(attrs["version"])
392		self.cmap = {}
393		for element in content:
394			if type(element) <> TupleType:
395				continue
396			name, attrs, content = element
397			if name <> "map":
398				continue
399			self.cmap[safeEval(attrs["code"])] = attrs["name"]
400
401
402cmap_format_4_format = ">7H"
403
404#uint16  endCode[segCount]          # Ending character code for each segment, last = 0xFFFF.
405#uint16  reservedPad                # This value should be zero
406#uint16  startCode[segCount]        # Starting character code for each segment
407#uint16  idDelta[segCount]          # Delta for all character codes in segment
408#uint16  idRangeOffset[segCount]    # Offset in bytes to glyph indexArray, or 0
409#uint16  glyphIndexArray[variable]  # Glyph index array
410
411def splitRange(startCode, endCode, cmap):
412	# Try to split a range of character codes into subranges with consecutive
413	# glyph IDs in such a way that the cmap4 subtable can be stored "most"
414	# efficiently. I can't prove I've got the optimal solution, but it seems
415	# to do well with the fonts I tested: none became bigger, many became smaller.
416	if startCode == endCode:
417		return [], [endCode]
418
419	lastID = cmap[startCode]
420	lastCode = startCode
421	inOrder = None
422	orderedBegin = None
423	subRanges = []
424
425	# Gather subranges in which the glyph IDs are consecutive.
426	for code in range(startCode + 1, endCode + 1):
427		glyphID = cmap[code]
428
429		if glyphID - 1 == lastID:
430			if inOrder is None or not inOrder:
431				inOrder = 1
432				orderedBegin = lastCode
433		else:
434			if inOrder:
435				inOrder = 0
436				subRanges.append((orderedBegin, lastCode))
437				orderedBegin = None
438
439		lastID = glyphID
440		lastCode = code
441
442	if inOrder:
443		subRanges.append((orderedBegin, lastCode))
444	assert lastCode == endCode
445
446	# Now filter out those new subranges that would only make the data bigger.
447	# A new segment cost 8 bytes, not using a new segment costs 2 bytes per
448	# character.
449	newRanges = []
450	for b, e in subRanges:
451		if b == startCode and e == endCode:
452			break  # the whole range, we're fine
453		if b == startCode or e == endCode:
454			threshold = 4  # split costs one more segment
455		else:
456			threshold = 8  # split costs two more segments
457		if (e - b + 1) > threshold:
458			newRanges.append((b, e))
459	subRanges = newRanges
460
461	if not subRanges:
462		return [], [endCode]
463
464	if subRanges[0][0] != startCode:
465		subRanges.insert(0, (startCode, subRanges[0][0] - 1))
466	if subRanges[-1][1] != endCode:
467		subRanges.append((subRanges[-1][1] + 1, endCode))
468
469	# Fill the "holes" in the segments list -- those are the segments in which
470	# the glyph IDs are _not_ consecutive.
471	i = 1
472	while i < len(subRanges):
473		if subRanges[i-1][1] + 1 != subRanges[i][0]:
474			subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1))
475			i = i + 1
476		i = i + 1
477
478	# Transform the ranges into startCode/endCode lists.
479	start = []
480	end = []
481	for b, e in subRanges:
482		start.append(b)
483		end.append(e)
484	start.pop(0)
485
486	assert len(start) + 1 == len(end)
487	return start, end
488
489
490class cmap_format_4(CmapSubtable):
491
492	def decompile(self, data, ttFont):
493		(format, length, self.version, segCountX2,
494				searchRange, entrySelector, rangeShift) = \
495					struct.unpack(cmap_format_4_format, data[:14])
496		assert len(data) == length, "corrupt cmap table (%d, %d)" % (len(data), length)
497		segCount = segCountX2 / 2
498
499		allCodes = array.array("H")
500		allCodes.fromstring(data[14:])
501		if ttLib.endian <> "big":
502			allCodes.byteswap()
503
504		# divide the data
505		endCode = allCodes[:segCount]
506		allCodes = allCodes[segCount+1:]  # the +1 is skipping the reservedPad field
507		startCode = allCodes[:segCount]
508		allCodes = allCodes[segCount:]
509		idDelta = allCodes[:segCount]
510		allCodes = allCodes[segCount:]
511		idRangeOffset = allCodes[:segCount]
512		glyphIndexArray = allCodes[segCount:]
513
514		# build 2-byte character mapping
515		cmap = {}
516		for i in range(len(startCode) - 1):	# don't do 0xffff!
517			for charCode in range(startCode[i], endCode[i] + 1):
518				rangeOffset = idRangeOffset[i]
519				if rangeOffset == 0:
520					glyphID = charCode + idDelta[i]
521				else:
522					# *someone* needs to get killed.
523					index = idRangeOffset[i] / 2 + (charCode - startCode[i]) + i - len(idRangeOffset)
524					if glyphIndexArray[index] <> 0:  # if not missing glyph
525						glyphID = glyphIndexArray[index] + idDelta[i]
526					else:
527						glyphID = 0  # missing glyph
528				cmap[charCode] = ttFont.getGlyphName(glyphID % 0x10000)
529		self.cmap = cmap
530
531	def compile(self, ttFont):
532		from fontTools.ttLib.sfnt import maxPowerOfTwo
533
534		cmap = {}  # code:glyphID mapping
535		for code, glyphName in self.cmap.items():
536			cmap[code] = ttFont.getGlyphID(glyphName)
537		codes = cmap.keys()
538		codes.sort()
539
540		# Build startCode and endCode lists.
541		# Split the char codes in ranges of consecutive char codes, then split
542		# each range in more ranges of consecutive/not consecutive glyph IDs.
543		# See splitRange().
544		lastCode = codes[0]
545		endCode = []
546		startCode = [lastCode]
547		for charCode in codes[1:]:  # skip the first code, it's the first start code
548			if charCode == lastCode + 1:
549				lastCode = charCode
550				continue
551			start, end = splitRange(startCode[-1], lastCode, cmap)
552			startCode.extend(start)
553			endCode.extend(end)
554			startCode.append(charCode)
555			lastCode = charCode
556		endCode.append(lastCode)
557		startCode.append(0xffff)
558		endCode.append(0xffff)
559
560		# build up rest of cruft
561		idDelta = []
562		idRangeOffset = []
563		glyphIndexArray = []
564
565		for i in range(len(endCode)-1):  # skip the closing codes (0xffff)
566			indices = []
567			for charCode in range(startCode[i], endCode[i] + 1):
568				indices.append(cmap[charCode])
569			if indices == range(indices[0], indices[0] + len(indices)):
570				idDelta.append((indices[0] - startCode[i]) % 0x10000)
571				idRangeOffset.append(0)
572			else:
573				# someone *definitely* needs to get killed.
574				idDelta.append(0)
575				idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i))
576				glyphIndexArray.extend(indices)
577		idDelta.append(1)  # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef
578		idRangeOffset.append(0)
579
580		# Insane.
581		segCount = len(endCode)
582		segCountX2 = segCount * 2
583		maxExponent = maxPowerOfTwo(segCount)
584		searchRange = 2 * (2 ** maxExponent)
585		entrySelector = maxExponent
586		rangeShift = 2 * segCount - searchRange
587
588		allCodes = array.array("H",
589				endCode + [0] + startCode + idDelta + idRangeOffset + glyphIndexArray)
590		if ttLib.endian <> "big":
591			allCodes.byteswap()
592		data = allCodes.tostring()
593		length = struct.calcsize(cmap_format_4_format) + len(data)
594		header = struct.pack(cmap_format_4_format, self.format, length, self.version,
595				segCountX2, searchRange, entrySelector, rangeShift)
596		data = header + data
597		return data
598
599	def toXML(self, writer, ttFont):
600		from fontTools.unicode import Unicode
601		codes = self.cmap.items()
602		codes.sort()
603		writer.begintag(self.__class__.__name__, [
604				("platformID", self.platformID),
605				("platEncID", self.platEncID),
606				("version", self.version),
607				])
608		writer.newline()
609
610		for code, name in codes:
611			writer.simpletag("map", code=hex(code), name=name)
612			writer.comment(Unicode[code])
613			writer.newline()
614
615		writer.endtag(self.__class__.__name__)
616		writer.newline()
617
618	def fromXML(self, (name, attrs, content), ttFont):
619		self.version = safeEval(attrs["version"])
620		self.cmap = {}
621		for element in content:
622			if type(element) <> TupleType:
623				continue
624			name, attrs, content = element
625			if name <> "map":
626				continue
627			self.cmap[safeEval(attrs["code"])] = attrs["name"]
628
629
630class cmap_format_6(CmapSubtable):
631
632	def decompile(self, data, ttFont):
633		format, length, version, firstCode, entryCount = struct.unpack(
634				">HHHHH", data[:10])
635		self.version = int(version)
636		firstCode = int(firstCode)
637		self.version = int(version)
638		data = data[10:]
639		#assert len(data) == 2 * entryCount  # XXX not true in Apple's Helvetica!!!
640		glyphIndexArray = array.array("H")
641		glyphIndexArray.fromstring(data[:2 * int(entryCount)])
642		if ttLib.endian <> "big":
643			glyphIndexArray.byteswap()
644		self.cmap = cmap = {}
645		for i in range(len(glyphIndexArray)):
646			glyphID = glyphIndexArray[i]
647			glyphName = ttFont.getGlyphName(glyphID)
648			cmap[i+firstCode] = glyphName
649
650	def compile(self, ttFont):
651		codes = self.cmap.keys()
652		codes.sort()
653		assert codes == range(codes[0], codes[0] + len(codes))
654		glyphIndexArray = array.array("H", [0] * len(codes))
655		firstCode = codes[0]
656		for i in range(len(codes)):
657			code = codes[i]
658			glyphIndexArray[code-firstCode] = ttFont.getGlyphID(self.cmap[code])
659		if ttLib.endian <> "big":
660			glyphIndexArray.byteswap()
661		data = glyphIndexArray.tostring()
662		header = struct.pack(">HHHHH",
663				6, len(data) + 10, self.version, firstCode, len(self.cmap))
664		return header + data
665
666	def toXML(self, writer, ttFont):
667		codes = self.cmap.items()
668		codes.sort()
669		writer.begintag(self.__class__.__name__, [
670				("platformID", self.platformID),
671				("platEncID", self.platEncID),
672				("version", self.version),
673				])
674		writer.newline()
675
676		for code, name in codes:
677			writer.simpletag("map", code=hex(code), name=name)
678			writer.newline()
679
680		writer.endtag(self.__class__.__name__)
681		writer.newline()
682
683	def fromXML(self, (name, attrs, content), ttFont):
684		self.version = safeEval(attrs["version"])
685		self.cmap = {}
686		for element in content:
687			if type(element) <> TupleType:
688				continue
689			name, attrs, content = element
690			if name <> "map":
691				continue
692			self.cmap[safeEval(attrs["code"])] = attrs["name"]
693
694
695class cmap_format_12(CmapSubtable):
696
697	def decompile(self, data, ttFont):
698		format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16])
699		data = data[16:]
700		assert len(data) == nGroups*12 == (length -16)
701		self.cmap = cmap = {}
702		for i in range(nGroups):
703			startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[:12] )
704			data = data[12:]
705			while startCharCode <= endCharCode:
706				glyphName = ttFont.getGlyphName(glyphID)
707				cmap[startCharCode] = glyphName
708				glyphID = glyphID +1
709				startCharCode = startCharCode + 1
710		self.format = format
711		self.reserved = reserved
712		self.length = length
713		self.language = language
714		self.nGroups = nGroups
715
716	def compile(self, ttFont):
717		cmap = {}  # code:glyphID mapping
718		for code, glyphName in self.cmap.items():
719			cmap[code] = ttFont.getGlyphID(glyphName)
720
721		charCodes = self.cmap.keys()
722		charCodes.sort()
723		startCharCode = charCodes[0]
724		startGlyphID = cmap[startCharCode]
725		nextGlyphID = startGlyphID + 1
726		nGroups = 1
727		data = ""
728		for charCode in charCodes:
729			glyphID = cmap[charCode]
730			if glyphID != nextGlyphID:
731				endCharCode =  charCode -1
732				data = data + struct.pack(">LLL", startCharCode, endCharCode, startGlyphID)
733				startGlyphID = glyphID
734				startCharCode = charCode
735				nGroups = nGroups + 1
736			nextGlyphID = glyphID +1
737
738		data = struct.pack(">HHLLL", self.format, 0 , len(data), self.language, nGroups) + data
739		return data
740
741	def toXML(self, writer, ttFont):
742		writer.begintag(self.__class__.__name__, [
743				("platformID", self.platformID),
744				("platEncID", self.platEncID),
745				("format", self.format),
746				("reserved", self.reserved),
747				("length", self.length),
748				("language", self.language),
749				("nGroups", self.nGroups),
750				])
751		writer.newline()
752		items = self.cmap.items()
753		items.sort()
754		for code, name in items:
755			writer.simpletag("map", code=hex(code), name=name)
756			writer.newline()
757		writer.endtag(self.__class__.__name__)
758		writer.newline()
759
760	def fromXML(self, (name, attrs, content), ttFont):
761		self.format = safeEval(attrs["format"])
762		self.reserved = safeEval(attrs["reserved"])
763		self.length = safeEval(attrs["length"])
764		self.language = safeEval(attrs["language"])
765		self.nGroups = safeEval(attrs["nGroups"])
766		self.cmap = {}
767		for element in content:
768			if type(element) <> TupleType:
769				continue
770			name, attrs, content = element
771			if name <> "map":
772				continue
773			self.cmap[safeEval(attrs["code"])] = attrs["name"]
774
775
776class cmap_format_unknown(CmapSubtable):
777
778	def decompile(self, data, ttFont):
779		self.data = data
780
781	def compile(self, ttFont):
782		return self.data
783
784
785cmap_classes = {
786		0: cmap_format_0,
787		2: cmap_format_2,
788		4: cmap_format_4,
789		6: cmap_format_6,
790		12: cmap_format_12,
791		}
792
793
794