ttLib/tables/_c_m_a_p.py

import DefaultTable
import struct
import array
from fontTools import ttLib
from fontTools.misc.textTools import safeEval, readHex
from types import TupleType


class table__c_m_a_p(DefaultTable.DefaultTable):

	def getcmap(self, platformID, platEncID):
		for subtable in self.tables:
			if (subtable.platformID == platformID and
					subtable.platEncID == platEncID):
				return subtable
		return None # not found

	def decompile(self, data, ttFont):
		tableVersion, numSubTables = struct.unpack(">HH", data[:4])
		self.tableVersion = int(tableVersion)
		self.tables = tables = []
		for i in range(numSubTables):
			platformID, platEncID, offset = struct.unpack(
					">HHl", data[4+i*8:4+(i+1)*8])
			platformID, platEncID = int(platformID), int(platEncID)
			format, length = struct.unpack(">HH", data[offset:offset+4])
			if (format < 8) and not length:
				continue  # bogus cmap subtable?
			if format in [8,10,12]:
				format, reserved, length = struct.unpack(">HHL", data[offset:offset+8])
			if not cmap_classes.has_key(format):
				table = cmap_format_unknown(format)
			else:
				table = cmap_classes[format](format)
			table.platformID = platformID
			table.platEncID = platEncID
			table.decompile(data[offset:offset+int(length)], ttFont)
			tables.append(table)

	def compile(self, ttFont):
		self.tables.sort()    # sort according to the spec; see CmapSubtable.__cmp__()
		numSubTables = len(self.tables)
		totalOffset = 4 + 8 * numSubTables
		data = struct.pack(">HH", self.tableVersion, numSubTables)
		tableData = ""
		done = {}  # remember the data so we can reuse the "pointers"
		for table in self.tables:
			chunk = table.compile(ttFont)
			if done.has_key(chunk):
				offset = done[chunk]
			else:
				offset = done[chunk] = totalOffset + len(tableData)
				tableData = tableData + chunk
			data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset)
		return data + tableData

	def toXML(self, writer, ttFont):
		writer.simpletag("tableVersion", version=self.tableVersion)
		writer.newline()
		for table in self.tables:
			table.toXML(writer, ttFont)

	def fromXML(self, (name, attrs, content), ttFont):
		if name == "tableVersion":
			self.tableVersion = safeEval(attrs["version"])
			return
		if name[:12] <> "cmap_format_":
			return
		if not hasattr(self, "tables"):
			self.tables = []
		format = safeEval(name[12])
		if not cmap_classes.has_key(format):
			table = cmap_format_unknown(format)
		else:
			table = cmap_classes[format](format)
		table.platformID = safeEval(attrs["platformID"])
		table.platEncID = safeEval(attrs["platEncID"])
		table.fromXML((name, attrs, content), ttFont)
		self.tables.append(table)


class CmapSubtable:

	def __init__(self, format):
		self.format = format

	def toXML(self, writer, ttFont):
		writer.begintag(self.__class__.__name__, [
				("platformID", self.platformID),
				("platEncID", self.platEncID),
				])
		writer.newline()
		writer.dumphex(self.compile(ttFont))
		writer.endtag(self.__class__.__name__)
		writer.newline()

	def fromXML(self, (name, attrs, content), ttFont):
		self.decompile(readHex(content), ttFont)

	def __cmp__(self, other):
		# implemented so that list.sort() sorts according to the cmap spec.
		selfTuple = (
					self.platformID,
					self.platEncID,
					self.version,
					self.__dict__)
		otherTuple = (
					other.platformID,
					other.platEncID,
					other.version,
					other.__dict__)
		return cmp(selfTuple, otherTuple)


class cmap_format_0(CmapSubtable):

	def decompile(self, data, ttFont):
		format, length, version = struct.unpack(">HHH", data[:6])
		self.version = int(version)
		assert len(data) == 262 == length
		glyphIdArray = array.array("B")
		glyphIdArray.fromstring(data[6:])
		self.cmap = cmap = {}
		for charCode in range(len(glyphIdArray)):
			cmap[charCode] = ttFont.getGlyphName(glyphIdArray[charCode])

	def compile(self, ttFont):
		charCodes = self.cmap.keys()
		charCodes.sort()
		assert charCodes == range(256)  # charCodes[charCode] == charCode
		for charCode in charCodes:
			# reusing the charCodes list!
			charCodes[charCode] = ttFont.getGlyphID(self.cmap[charCode])
		glyphIdArray = array.array("B", charCodes)
		data = struct.pack(">HHH", 0, 262, self.version) + glyphIdArray.tostring()
		assert len(data) == 262
		return data

	def toXML(self, writer, ttFont):
		writer.begintag(self.__class__.__name__, [
				("platformID", self.platformID),
				("platEncID", self.platEncID),
				("version", self.version),
				])
		writer.newline()
		items = self.cmap.items()
		items.sort()
		for code, name in items:
			writer.simpletag("map", code=hex(code), name=name)
			writer.newline()
		writer.endtag(self.__class__.__name__)
		writer.newline()

	def fromXML(self, (name, attrs, content), ttFont):
		self.version = safeEval(attrs["version"])
		self.cmap = {}
		for element in content:
			if type(element) <> TupleType:
				continue
			name, attrs, content = element
			if name <> "map":
				continue
			self.cmap[safeEval(attrs["code"])] = attrs["name"]


subHeaderFormat = ">HHhH"
class SubHeader:
	def __init__(self):
		self.firstCode = None
		self.entryCount = None
		self.idDelta = None
		self.idRangeOffset = None
		self.glyphIndexArray = []

class cmap_format_2(CmapSubtable):

	def decompile(self, data, ttFont):
		format, length, version = struct.unpack(">HHH", data[:6])
		self.version = int(version)
		data = data[6:]
		subHeaderKeys = []
		maxSubHeaderindex = 0

		# get the key array, and determine the number of subHeaders.
		for i in range(256):
			key = struct.unpack(">H", data[:2])[0]
			value = int(key)/8
			if value > maxSubHeaderindex:
				maxSubHeaderindex  = value
			data = data[2:]
			subHeaderKeys.append(value)

		#Load subHeaders
		subHeaderList = []
		for i in range(maxSubHeaderindex + 1):
			subHeader = SubHeader()
			(subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \
				subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[:8])
			data = data[8:]
			giData = data[subHeader.idRangeOffset-2:]
			for j in range(subHeader.entryCount):
				gi = struct.unpack(">H", giData[:2])[0]
				giData = giData[2:]
				subHeader.glyphIndexArray.append(int(gi))

			subHeaderList.append(subHeader)

		# How this gets processed.
		# Charcodes may be one or two bytes.
		# The first byte of a charcode is mapped through the  subHeaderKeys, to select
		# a subHeader. For any subheader but 0, the next byte is then mapped through the
		# selected subheader. If subheader Index 0 is selected, then the byte itself is
		# mapped through the subheader, and there is no second byte.
		# Then assume that the subsequent byte is the first byte of the next charcode,and repeat.
		#
		# Each subheader references a range in the glyphIndexArray whose length is entryCount.
		# The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray
		# referenced by another subheader.
		# The only subheader that will be referenced by more than one first-byte value is the subheader
		# that maps the entire range of glyphID values to glyphIndex 0, e.g notdef:
		#	 {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx}
		# A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex.
		# A subheader specifies a subrange within (0...256) by the
		# firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero
		# (e.g. glyph not in font).
		# If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar).
		# The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by
		# counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the
		# glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex.
		# Example for Logocut-Medium
		# first byte of charcode = 129; selects subheader 1.
		# subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252}
		# second byte of charCode = 66
		# the index offset = 66-64 = 2.
		# The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is:
		# [glyphIndexArray index], [subrange array index] = glyphIndex
		# [256], [0]=1 	from charcode [129, 64]
		# [257], [1]=2  	from charcode [129, 65]
		# [258], [2]=3  	from charcode [129, 66]
		# [259], [3]=4  	from charcode [129, 67]
		# So, the glyphIndex = 3 from the array. Then if idDelta is not zero, add it to the glyphInex to get the final glyphIndex
		# value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew!
		# Has anyone ever really tried to overlap the subHeader subranges in the glyphIndexArray? I doubt it!

		self.data = ""
		self.cmap = {}
		for firstByte in range(256):
			subHeadindex = subHeaderKeys[firstByte]
			subHeader = subHeaderList[subHeadindex]
			if subHeadindex == 0:
				if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount):
					gi = 0
				else:
					charCode = firstByte
					offsetIndex = firstByte - subHeader.firstCode
					gi = subHeader.glyphIndexArray[offsetIndex]
					if gi != 0:
						gi = gi + subHeader.idDelta
				gName = ttFont.getGlyphName(gi)
				self.cmap[charCode] = gName
			else:
				if subHeader.entryCount:
					for offsetIndex in range(subHeader.entryCount):
						charCode = firstByte * 256 + offsetIndex + subHeader.firstCode
						gi = subHeader.glyphIndexArray[offsetIndex]
						if gi != 0:
							gi = gi + subHeader.idDelta
						gName = ttFont.getGlyphName(gi)
						self.cmap[charCode] = gName
				else:
					# Is a subHead that maps to .notdef. We do need to record it, so we can later
					# know that this firstByte value is the initial byte of a two byte charcode,
					# as opposed to a sing byte charcode.
					charCode = firstByte * 256
					gName = ttFont.getGlyphName(0)
					self.cmap[charCode] = gName


	def compile(self, ttFont):
		kEmptyTwoCharCodeRange = -1
		items = self.cmap.items()
		items.sort()

		# All one-byte code values map through the subHeaderKeys table to subheader 0.
		# Assume that all entries in the subHeaderKeys table are one-byte codes unless proven otherwise.
		subHeaderKeys = [ 0 for x in  range(256)]
		subHeaderList = []

		lastFirstByte = -1
		for item in items:
			charCode = item[0]
			firstbyte = charCode >> 8
			secondByte = charCode & 0x00FF
			gi = ttFont.getGlyphID(item[1])
			if firstbyte != lastFirstByte:
				if lastFirstByte > -1:
					# fix GI's and iDelta of last subheader.
					subHeader.idDelta = 0
					if subHeader.entryCount > 0:
						minGI = min(subHeader.glyphIndexArray) -1
						if minGI > 0:
							subHeader.idDelta = minGI
							for i in range(subHeader.entryCount):
								subHeader.glyphIndexArray[i] = subHeader.glyphIndexArray[i] - minGI
					assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange."
				# init new subheader
				subHeader = SubHeader()
				subHeader.firstCode = secondByte
				if (secondByte == 0) and ( gi==0 ) and (lastFirstByte > -1): # happens only when the font has no glyphs in the this charcpde range.
					subHeader.entryCount = 0
					subHeaderKeys[firstbyte] = kEmptyTwoCharCodeRange
				else:
					subHeader.entryCount = 1
					subHeader.glyphIndexArray.append(gi)
					subHeaderList.append(subHeader)
					subHeaderKeys[firstbyte] = len(subHeaderList) -1
				lastFirstByte = firstbyte
			else:
				assert (subHeader.entryCount != 0), "Error: we should never see another entry for an empty 2 byte charcode range."
				codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount)
				for i in range(codeDiff):
					subHeader.glyphIndexArray.append(0)
				subHeader.glyphIndexArray.append(gi)
				subHeader.entryCount = subHeader.entryCount + codeDiff + 1
		# fix GI's and iDelta of last subheader.
		subHeader.idDelta = 0
		if subHeader.entryCount > 0:
			minGI = min(subHeader.glyphIndexArray) -1
			if minGI > 0:
				subHeader.idDelta = minGI
				for i in range(subHeader.entryCount):
					subHeaderList[i] = subHeaderList[i] - minGI

		# Now we add a last subheader for the subHeaderKeys which mapped to empty two byte charcode ranges.
		subHeader = SubHeader()
		subHeader.firstCode = 0
		subHeader.entryCount = 0
		subHeader.idDelta = 0
		subHeader.idRangeOffset = 2
		subHeaderList.append(subHeader)
		emptySubheadIndex = len(subHeaderList) - 1
		for index in range(256):
			if subHeaderKeys[index] < 0:
				subHeaderKeys[index] = emptySubheadIndex
		# Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the
		# idRangeOffset word of this subHeader. we can safely point to the first entry in the GlyphIndexArray,
		# since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with
		# charcode 0 and GID 0.

		# I am not going to try and optimise by trying to overlap the glyphIDArray subranges of the subheaders -
		# I will just write them out sequentially.
		idRangeOffset = (len(subHeaderList)-1)*8  + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset.
		for subHeader in subHeaderList[:-1]: # skip last special empty-set subheader
			subHeader.idRangeOffset = idRangeOffset
			idRangeOffset = (idRangeOffset -8) + subHeader.entryCount*2 # one less subheader, one more subRange.

		# Now we can write out the data!
		length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array.
		for subhead in 	subHeaderList[:-1]:
			length = length + subhead.entryCount*2
		data = struct.pack(">HHH", 2, length, self.version)
		for index in subHeaderKeys:
			data = data + struct.pack(">H", index*8)
		for subhead in 	subHeaderList:
			data = data + struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset)
		for subhead in 	subHeaderList[:-1]:
			for gi in subhead.glyphIndexArray:
				data = data + struct.pack(">H", gi)

		assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length)
		return data


	def toXML(self, writer, ttFont):
		writer.begintag(self.__class__.__name__, [
				("platformID", self.platformID),
				("platEncID", self.platEncID),
				("version", self.version),
				])
		writer.newline()
		items = self.cmap.items()
		items.sort()
		for code, name in items:
			writer.simpletag("map", code=hex(code), name=name)
			writer.newline()
		writer.endtag(self.__class__.__name__)
		writer.newline()

	def fromXML(self, (name, attrs, content), ttFont):
		self.version = safeEval(attrs["version"])
		self.cmap = {}
		for element in content:
			if type(element) <> TupleType:
				continue
			name, attrs, content = element
			if name <> "map":
				continue
			self.cmap[safeEval(attrs["code"])] = attrs["name"]


cmap_format_4_format = ">7H"

#uint16  endCode[segCount]          # Ending character code for each segment, last = 0xFFFF.
#uint16  reservedPad                # This value should be zero
#uint16  startCode[segCount]        # Starting character code for each segment
#uint16  idDelta[segCount]          # Delta for all character codes in segment
#uint16  idRangeOffset[segCount]    # Offset in bytes to glyph indexArray, or 0
#uint16  glyphIndexArray[variable]  # Glyph index array

def splitRange(startCode, endCode, cmap):
	# Try to split a range of character codes into subranges with consecutive
	# glyph IDs in such a way that the cmap4 subtable can be stored "most"
	# efficiently. I can't prove I've got the optimal solution, but it seems
	# to do well with the fonts I tested: none became bigger, many became smaller.
	if startCode == endCode:
		return [], [endCode]

	lastID = cmap[startCode]
	lastCode = startCode
	inOrder = None
	orderedBegin = None
	subRanges = []

	# Gather subranges in which the glyph IDs are consecutive.
	for code in range(startCode + 1, endCode + 1):
		glyphID = cmap[code]

		if glyphID - 1 == lastID:
			if inOrder is None or not inOrder:
				inOrder = 1
				orderedBegin = lastCode
		else:
			if inOrder:
				inOrder = 0
				subRanges.append((orderedBegin, lastCode))
				orderedBegin = None

		lastID = glyphID
		lastCode = code

	if inOrder:
		subRanges.append((orderedBegin, lastCode))
	assert lastCode == endCode

	# Now filter out those new subranges that would only make the data bigger.
	# A new segment cost 8 bytes, not using a new segment costs 2 bytes per
	# character.
	newRanges = []
	for b, e in subRanges:
		if b == startCode and e == endCode:
			break  # the whole range, we're fine
		if b == startCode or e == endCode:
			threshold = 4  # split costs one more segment
		else:
			threshold = 8  # split costs two more segments
		if (e - b + 1) > threshold:
			newRanges.append((b, e))
	subRanges = newRanges

	if not subRanges:
		return [], [endCode]

	if subRanges[0][0] != startCode:
		subRanges.insert(0, (startCode, subRanges[0][0] - 1))
	if subRanges[-1][1] != endCode:
		subRanges.append((subRanges[-1][1] + 1, endCode))

	# Fill the "holes" in the segments list -- those are the segments in which
	# the glyph IDs are _not_ consecutive.
	i = 1
	while i < len(subRanges):
		if subRanges[i-1][1] + 1 != subRanges[i][0]:
			subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1))
			i = i + 1
		i = i + 1

	# Transform the ranges into startCode/endCode lists.
	start = []
	end = []
	for b, e in subRanges:
		start.append(b)
		end.append(e)
	start.pop(0)

	assert len(start) + 1 == len(end)
	return start, end


class cmap_format_4(CmapSubtable):

	def decompile(self, data, ttFont):
		(format, length, self.version, segCountX2,
				searchRange, entrySelector, rangeShift) = \
					struct.unpack(cmap_format_4_format, data[:14])
		assert len(data) == length, "corrupt cmap table (%d, %d)" % (len(data), length)
		segCount = segCountX2 / 2

		allCodes = array.array("H")
		allCodes.fromstring(data[14:])
		if ttLib.endian <> "big":
			allCodes.byteswap()

		# divide the data
		endCode = allCodes[:segCount]
		allCodes = allCodes[segCount+1:]  # the +1 is skipping the reservedPad field
		startCode = allCodes[:segCount]
		allCodes = allCodes[segCount:]
		idDelta = allCodes[:segCount]
		allCodes = allCodes[segCount:]
		idRangeOffset = allCodes[:segCount]
		glyphIndexArray = allCodes[segCount:]

		# build 2-byte character mapping
		cmap = {}
		for i in range(len(startCode) - 1):	# don't do 0xffff!
			for charCode in range(startCode[i], endCode[i] + 1):
				rangeOffset = idRangeOffset[i]
				if rangeOffset == 0:
					glyphID = charCode + idDelta[i]
				else:
					# *someone* needs to get killed.
					index = idRangeOffset[i] / 2 + (charCode - startCode[i]) + i - len(idRangeOffset)
					if glyphIndexArray[index] <> 0:  # if not missing glyph
						glyphID = glyphIndexArray[index] + idDelta[i]
					else:
						glyphID = 0  # missing glyph
				cmap[charCode] = ttFont.getGlyphName(glyphID % 0x10000)
		self.cmap = cmap

	def compile(self, ttFont):
		from fontTools.ttLib.sfnt import maxPowerOfTwo

		cmap = {}  # code:glyphID mapping
		for code, glyphName in self.cmap.items():
			cmap[code] = ttFont.getGlyphID(glyphName)
		codes = cmap.keys()
		codes.sort()

		# Build startCode and endCode lists.
		# Split the char codes in ranges of consecutive char codes, then split
		# each range in more ranges of consecutive/not consecutive glyph IDs.
		# See splitRange().
		lastCode = codes[0]
		endCode = []
		startCode = [lastCode]
		for charCode in codes[1:]:  # skip the first code, it's the first start code
			if charCode == lastCode + 1:
				lastCode = charCode
				continue
			start, end = splitRange(startCode[-1], lastCode, cmap)
			startCode.extend(start)
			endCode.extend(end)
			startCode.append(charCode)
			lastCode = charCode
		endCode.append(lastCode)
		startCode.append(0xffff)
		endCode.append(0xffff)

		# build up rest of cruft
		idDelta = []
		idRangeOffset = []
		glyphIndexArray = []

		for i in range(len(endCode)-1):  # skip the closing codes (0xffff)
			indices = []
			for charCode in range(startCode[i], endCode[i] + 1):
				indices.append(cmap[charCode])
			if indices == range(indices[0], indices[0] + len(indices)):
				idDelta.append((indices[0] - startCode[i]) % 0x10000)
				idRangeOffset.append(0)
			else:
				# someone *definitely* needs to get killed.
				idDelta.append(0)
				idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i))
				glyphIndexArray.extend(indices)
		idDelta.append(1)  # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef
		idRangeOffset.append(0)

		# Insane.
		segCount = len(endCode)
		segCountX2 = segCount * 2
		maxExponent = maxPowerOfTwo(segCount)
		searchRange = 2 * (2 ** maxExponent)
		entrySelector = maxExponent
		rangeShift = 2 * segCount - searchRange

		allCodes = array.array("H",
				endCode + [0] + startCode + idDelta + idRangeOffset + glyphIndexArray)
		if ttLib.endian <> "big":
			allCodes.byteswap()
		data = allCodes.tostring()
		length = struct.calcsize(cmap_format_4_format) + len(data)
		header = struct.pack(cmap_format_4_format, self.format, length, self.version,
				segCountX2, searchRange, entrySelector, rangeShift)
		data = header + data
		return data

	def toXML(self, writer, ttFont):
		from fontTools.unicode import Unicode
		codes = self.cmap.items()
		codes.sort()
		writer.begintag(self.__class__.__name__, [
				("platformID", self.platformID),
				("platEncID", self.platEncID),
				("version", self.version),
				])
		writer.newline()

		for code, name in codes:
			writer.simpletag("map", code=hex(code), name=name)
			writer.comment(Unicode[code])
			writer.newline()

		writer.endtag(self.__class__.__name__)
		writer.newline()

	def fromXML(self, (name, attrs, content), ttFont):
		self.version = safeEval(attrs["version"])
		self.cmap = {}
		for element in content:
			if type(element) <> TupleType:
				continue
			name, attrs, content = element
			if name <> "map":
				continue
			self.cmap[safeEval(attrs["code"])] = attrs["name"]


class cmap_format_6(CmapSubtable):

	def decompile(self, data, ttFont):
		format, length, version, firstCode, entryCount = struct.unpack(
				">HHHHH", data[:10])
		self.version = int(version)
		firstCode = int(firstCode)
		self.version = int(version)
		data = data[10:]
		#assert len(data) == 2 * entryCount  # XXX not true in Apple's Helvetica!!!
		glyphIndexArray = array.array("H")
		glyphIndexArray.fromstring(data[:2 * int(entryCount)])
		if ttLib.endian <> "big":
			glyphIndexArray.byteswap()
		self.cmap = cmap = {}
		for i in range(len(glyphIndexArray)):
			glyphID = glyphIndexArray[i]
			glyphName = ttFont.getGlyphName(glyphID)
			cmap[i+firstCode] = glyphName

	def compile(self, ttFont):
		codes = self.cmap.keys()
		codes.sort()
		assert codes == range(codes[0], codes[0] + len(codes))
		glyphIndexArray = array.array("H", [0] * len(codes))
		firstCode = codes[0]
		for i in range(len(codes)):
			code = codes[i]
			glyphIndexArray[code-firstCode] = ttFont.getGlyphID(self.cmap[code])
		if ttLib.endian <> "big":
			glyphIndexArray.byteswap()
		data = glyphIndexArray.tostring()
		header = struct.pack(">HHHHH",
				6, len(data) + 10, self.version, firstCode, len(self.cmap))
		return header + data

	def toXML(self, writer, ttFont):
		codes = self.cmap.items()
		codes.sort()
		writer.begintag(self.__class__.__name__, [
				("platformID", self.platformID),
				("platEncID", self.platEncID),
				("version", self.version),
				])
		writer.newline()

		for code, name in codes:
			writer.simpletag("map", code=hex(code), name=name)
			writer.newline()

		writer.endtag(self.__class__.__name__)
		writer.newline()

	def fromXML(self, (name, attrs, content), ttFont):
		self.version = safeEval(attrs["version"])
		self.cmap = {}
		for element in content:
			if type(element) <> TupleType:
				continue
			name, attrs, content = element
			if name <> "map":
				continue
			self.cmap[safeEval(attrs["code"])] = attrs["name"]


class cmap_format_12(CmapSubtable):

	def decompile(self, data, ttFont):
		format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16])
		data = data[16:]
		assert len(data) == nGroups*12 == (length -16)
		self.cmap = cmap = {}
		for i in range(nGroups):
			startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[:12] )
			data = data[12:]
			while startCharCode <= endCharCode:
				glyphName = ttFont.getGlyphName(glyphID)
				cmap[startCharCode] = glyphName
				glyphID = glyphID +1
				startCharCode = startCharCode + 1
		self.format = format
		self.reserved = reserved
		self.length = length
		self.language = language
		self.nGroups = nGroups

	def compile(self, ttFont):
		cmap = {}  # code:glyphID mapping
		for code, glyphName in self.cmap.items():
			cmap[code] = ttFont.getGlyphID(glyphName)

		charCodes = self.cmap.keys()
		charCodes.sort()
		startCharCode = charCodes[0]
		startGlyphID = cmap[startCharCode]
		nextGlyphID = startGlyphID + 1
		nGroups = 1
		data = ""
		for charCode in charCodes:
			glyphID = cmap[charCode]
			if glyphID != nextGlyphID:
				endCharCode =  charCode -1
				data = data + struct.pack(">LLL", startCharCode, endCharCode, startGlyphID)
				startGlyphID = glyphID
				startCharCode = charCode
				nGroups = nGroups + 1
			nextGlyphID = glyphID +1

		data = struct.pack(">HHLLL", self.format, 0 , len(data), self.language, nGroups) + data
		return data

	def toXML(self, writer, ttFont):
		writer.begintag(self.__class__.__name__, [
				("platformID", self.platformID),
				("platEncID", self.platEncID),
				("format", self.format),
				("reserved", self.reserved),
				("length", self.length),
				("language", self.language),
				("nGroups", self.nGroups),
				])
		writer.newline()
		items = self.cmap.items()
		items.sort()
		for code, name in items:
			writer.simpletag("map", code=hex(code), name=name)
			writer.newline()
		writer.endtag(self.__class__.__name__)
		writer.newline()

	def fromXML(self, (name, attrs, content), ttFont):
		self.format = safeEval(attrs["format"])
		self.reserved = safeEval(attrs["reserved"])
		self.length = safeEval(attrs["length"])
		self.language = safeEval(attrs["language"])
		self.nGroups = safeEval(attrs["nGroups"])
		self.cmap = {}
		for element in content:
			if type(element) <> TupleType:
				continue
			name, attrs, content = element
			if name <> "map":
				continue
			self.cmap[safeEval(attrs["code"])] = attrs["name"]


class cmap_format_unknown(CmapSubtable):

	def decompile(self, data, ttFont):
		self.data = data

	def compile(self, ttFont):
		return self.data


cmap_classes = {
		0: cmap_format_0,
		2: cmap_format_2,
		4: cmap_format_4,
		6: cmap_format_6,
		12: cmap_format_12,
		}