ttLib/tables/_c_m_a_p.py

cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)import DefaultTable
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)import struct
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)import string
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)import array
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)from fontTools import ttLib
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)from fontTools.misc.textTools import safeEval, readHex
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)from types import TupleType
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)class table__c_m_a_p(DefaultTable.DefaultTable):
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)	def getcmap(self, platformID, platEncID):
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		for subtable in self.tables:
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			if (subtable.platformID == platformID and
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)					subtable.platEncID == platEncID):
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)				return subtable
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		return None # not found
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)	def decompile(self, data, ttFont):
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		tableVersion, numSubTables = struct.unpack(">HH", data[:4])
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		self.tableVersion = int(tableVersion)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		self.tables = tables = []
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		for i in range(numSubTables):
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			platformID, platEncID, offset = struct.unpack(
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)					">HHl", data[4+i*8:4+(i+1)*8])
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			platformID, platEncID = int(platformID), int(platEncID)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			format, length = struct.unpack(">HH", data[offset:offset+4])
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			if not length:
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)				continue  # bogus cmap subtable?
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			if not cmap_classes.has_key(format):
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)				table = cmap_format_unknown(format)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			else:
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)				table = cmap_classes[format](format)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			table.platformID = platformID
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			table.platEncID = platEncID
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			table.decompile(data[offset:offset+int(length)], ttFont)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			tables.append(table)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)	def compile(self, ttFont):
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		self.tables.sort()    # sort according to the spec; see CmapSubtable.__cmp__()
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		numSubTables = len(self.tables)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		totalOffset = 4 + 8 * numSubTables
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		data = struct.pack(">HH", self.tableVersion, numSubTables)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		tableData = ""
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		done = {}  # remember the data so we can reuse the "pointers"
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		for table in self.tables:
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			chunk = table.compile(ttFont)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			if done.has_key(chunk):
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)				offset = done[chunk]
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			else:
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)				offset = done[chunk] = totalOffset + len(tableData)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)				tableData = tableData + chunk
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		return data + tableData
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)	def toXML(self, writer, ttFont):
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		writer.simpletag("tableVersion", version=self.tableVersion)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		writer.newline()
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		for table in self.tables:
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			table.toXML(writer, ttFont)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)	def fromXML(self, (name, attrs, content), ttFont):
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		if name == "tableVersion":
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			self.tableVersion = safeEval(attrs["version"])
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			return
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		if name[:12] <> "cmap_format_":
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			return
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		if not hasattr(self, "tables"):
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			self.tables = []
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		format = safeEval(name[12])
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		if not cmap_classes.has_key(format):
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			table = cmap_format_unknown(format)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		else:
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)			table = cmap_classes[format](format)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		table.platformID = safeEval(attrs["platformID"])
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		table.platEncID = safeEval(attrs["platEncID"])
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		table.fromXML((name, attrs, content), ttFont)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		self.tables.append(table)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)class CmapSubtable:
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)	def __init__(self, format):
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)		self.format = format
cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
	def toXML(self, writer, ttFont):
		writer.begintag(self.__class__.__name__, [
				("platformID", self.platformID),
				("platEncID", self.platEncID),
				])
		writer.newline()
		writer.dumphex(self.compile(ttFont))
		writer.endtag(self.__class__.__name__)
		writer.newline()

	def fromXML(self, (name, attrs, content), ttFont):
		self.decompile(readHex(content), ttFont)

	def __cmp__(self, other):
		# implemented so that list.sort() sorts according to the cmap spec.
		selfTuple = (
					self.platformID,
					self.platEncID,
					self.version,
					self.__dict__)
		otherTuple = (
					other.platformID,
					other.platEncID,
					other.version,
					other.__dict__)
		return cmp(selfTuple, otherTuple)


class cmap_format_0(CmapSubtable):

	def decompile(self, data, ttFont):
		format, length, version = struct.unpack(">HHH", data[:6])
		self.version = int(version)
		assert len(data) == 262 == length
		glyphIdArray = array.array("B")
		glyphIdArray.fromstring(data[6:])
		self.cmap = cmap = {}
		for charCode in range(len(glyphIdArray)):
			cmap[charCode] = ttFont.getGlyphName(glyphIdArray[charCode])

	def compile(self, ttFont):
		charCodes = self.cmap.keys()
		charCodes.sort()
		assert charCodes == range(256)  # charCodes[charCode] == charCode
		for charCode in charCodes:
			# reusing the charCodes list!
			charCodes[charCode] = ttFont.getGlyphID(self.cmap[charCode])
		glyphIdArray = array.array("B", charCodes)
		data = struct.pack(">HHH", 0, 262, self.version) + glyphIdArray.tostring()
		assert len(data) == 262
		return data

	def toXML(self, writer, ttFont):
		writer.begintag(self.__class__.__name__, [
				("platformID", self.platformID),
				("platEncID", self.platEncID),
				("version", self.version),
				])
		writer.newline()
		items = self.cmap.items()
		items.sort()
		for code, name in items:
			writer.simpletag("map", code=hex(code), name=name)
			writer.newline()
		writer.endtag(self.__class__.__name__)
		writer.newline()

	def fromXML(self, (name, attrs, content), ttFont):
		self.version = safeEval(attrs["version"])
		self.cmap = {}
		for element in content:
			if type(element) <> TupleType:
				continue
			name, attrs, content = element
			if name <> "map":
				continue
			self.cmap[safeEval(attrs["code"])] = attrs["name"]


class cmap_format_2(CmapSubtable):

	def decompile(self, data, ttFont):
		format, length, version = struct.unpack(">HHH", data[:6])
		self.version = int(version)
		self.data = data

	def compile(self, ttFont):
		return self.data


cmap_format_4_format = ">7H"

#uint16	endCode[segCount]			# Ending character code for each segment, last = 0xFFFF.
#uint16	reservedPad					# This value should be zero
#uint16	startCode[segCount]			# Starting character code for each segment
#uint16	idDelta[segCount]			# Delta for all character codes in segment
#uint16	idRangeOffset[segCount]		# Offset in bytes to glyph indexArray, or 0
#uint16	glyphIndexArray[variable]	# Glyph index array

def splitRange(startCode, endCode, cmap):
	if startCode == endCode:
		return [], [endCode]

	allGlyphs = [(startCode, cmap[startCode])]  # XXX
	lastID = cmap[startCode]
	lastCode = startCode
	inOrder = None
	orderedBegin = None
	parts = []

	for code in range(startCode + 1, endCode + 1):
		glyphID = cmap[code]
		allGlyphs.append((code, glyphID))  # XXX

		if glyphID - 1 == lastID:
			if inOrder is None or not inOrder:
				inOrder = 1
				orderedBegin = lastCode
		else:
			if inOrder:
				inOrder = 0
				parts.append((orderedBegin, lastCode))
				orderedBegin = None

		lastID = glyphID
		lastCode = code

	if inOrder:
		parts.append((orderedBegin, lastCode))
	assert lastCode == endCode

	newParts = []
	for b, e in parts:
		if b == startCode and e == endCode:
			break  # the whole range, we're fine
		if b == startCode or e == endCode:
			threshold = 4  # split costs one more segment
		else:
			threshold = 8  # split costs two more segments
		if (e - b + 1) > threshold:
			newParts.append((b, e))
	parts = newParts

	if not parts:
		return [], [endCode]

	if parts[0][0] != startCode:
		parts.insert(0, (startCode, parts[0][0] - 1))
	if parts[-1][1] != endCode:
		parts.append((parts[-1][1] + 1, endCode))
	i = 1
	while i < len(parts):
		if parts[i-1][1] + 1 != parts[i][0]:
			parts.insert(i, (parts[i-1][1] + 1, parts[i][0] - 1))
			i = i + 1
		i = i + 1

	start = []
	end = []
	for b, e in parts:
		start.append(b)
		end.append(e)
	start.pop(0)

	assert len(start) + 1 == len(end)
	return start, end


class cmap_format_4(CmapSubtable):

	def decompile(self, data, ttFont):
		(format, length, self.version, segCountX2,
				searchRange, entrySelector, rangeShift) = \
					struct.unpack(cmap_format_4_format, data[:14])
		assert len(data) == length, "corrupt cmap table (%d, %d)" % (len(data), length)
		segCount = segCountX2 / 2

		allCodes = array.array("H")
		allCodes.fromstring(data[14:])
		if ttLib.endian <> "big":
			allCodes.byteswap()

		# divide the data
		endCode = allCodes[:segCount]
		allCodes = allCodes[segCount+1:]  # the +1 is skipping the reservedPad field
		startCode = allCodes[:segCount]
		allCodes = allCodes[segCount:]
		idDelta = allCodes[:segCount]
		allCodes = allCodes[segCount:]
		idRangeOffset = allCodes[:segCount]
		glyphIndexArray = allCodes[segCount:]

		#print ">>>> segCount", segCount, "len(glyphIndexArray):", len(glyphIndexArray), "len(data)", len(data)

		# build 2-byte character mapping
		cmap = {}
		for i in range(len(startCode) - 1):	# don't do 0xffff!
			for charCode in range(startCode[i], endCode[i] + 1):
				rangeOffset = idRangeOffset[i]
				if rangeOffset == 0:
					glyphID = charCode + idDelta[i]
				else:
					# *someone* needs to get killed.
					index = idRangeOffset[i] / 2 + (charCode - startCode[i]) + i - len(idRangeOffset)
					if glyphIndexArray[index] <> 0:  # if not missing glyph
						glyphID = glyphIndexArray[index] + idDelta[i]
					else:
						glyphID = 0  # missing glyph
				cmap[charCode] = ttFont.getGlyphName(glyphID % 0x10000)
		self.cmap = cmap

	def compile(self, ttFont):
		from fontTools.ttLib.sfnt import maxPowerOfTwo

		cmap = {}  # code:glyphID mapping
		for code, glyphName in self.cmap.items():
			cmap[code] = ttFont.getGlyphID(glyphName)
		codes = cmap.keys()
		codes.sort()

		# build startCode and endCode lists
		lastCode = codes[0]
		endCode = []
		startCode = [lastCode]
		for charCode in codes[1:]:  # skip the first code, it's the first start code
			if charCode == lastCode + 1:
				lastCode = charCode
				continue
			start, end = splitRange(startCode[-1], lastCode, cmap)
			startCode.extend(start)
			endCode.extend(end)
			startCode.append(charCode)
			lastCode = charCode
		endCode.append(lastCode)
		startCode.append(0xffff)
		endCode.append(0xffff)

		# build up rest of cruft
		idDelta = []
		idRangeOffset = []
		glyphIndexArray = []

		for i in range(len(endCode)-1):  # skip the closing codes (0xffff)
			indices = []
			for charCode in range(startCode[i], endCode[i] + 1):
				indices.append(cmap[charCode])
			if indices == range(indices[0], indices[0] + len(indices)):
				idDelta.append((indices[0] - startCode[i]) % 0x10000)
				idRangeOffset.append(0)
			else:
				# someone *definitely* needs to get killed.
				idDelta.append(0)
				idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i))
				glyphIndexArray.extend(indices)
		idDelta.append(1)  # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef
		idRangeOffset.append(0)

		# Insane.
		segCount = len(endCode)
		segCountX2 = segCount * 2
		maxExponent = maxPowerOfTwo(segCount)
		searchRange = 2 * (2 ** maxExponent)
		entrySelector = maxExponent
		rangeShift = 2 * segCount - searchRange

		allCodes = array.array("H",
				endCode + [0] + startCode + idDelta + idRangeOffset + glyphIndexArray)
		if ttLib.endian <> "big":
			allCodes.byteswap()
		data = allCodes.tostring()
		length = struct.calcsize(cmap_format_4_format) + len(data)
		header = struct.pack(cmap_format_4_format, self.format, length, self.version,
				segCountX2, searchRange, entrySelector, rangeShift)
		data = header + data

		#print "<<<< segCount", segCount, "len(glyphIndexArray):", len(glyphIndexArray), "len(data)", len(data)

		return data

	def toXML(self, writer, ttFont):
		from fontTools.unicode import Unicode
		codes = self.cmap.items()
		codes.sort()
		writer.begintag(self.__class__.__name__, [
				("platformID", self.platformID),
				("platEncID", self.platEncID),
				("version", self.version),
				])
		writer.newline()

		for code, name in codes:
			writer.simpletag("map", code=hex(code), name=name)
			writer.comment(Unicode[code])
			writer.newline()

		writer.endtag(self.__class__.__name__)
		writer.newline()

	def fromXML(self, (name, attrs, content), ttFont):
		self.version = safeEval(attrs["version"])
		self.cmap = {}
		for element in content:
			if type(element) <> TupleType:
				continue
			name, attrs, content = element
			if name <> "map":
				continue
			self.cmap[safeEval(attrs["code"])] = attrs["name"]


class cmap_format_6(CmapSubtable):

	def decompile(self, data, ttFont):
		format, length, version, firstCode, entryCount = struct.unpack(
				">HHHHH", data[:10])
		self.version = int(version)
		firstCode = int(firstCode)
		self.version = int(version)
		data = data[10:]
		#assert len(data) == 2 * entryCount  # XXX not true in Apple's Helvetica!!!
		glyphIndexArray = array.array("H")
		glyphIndexArray.fromstring(data[:2 * int(entryCount)])
		if ttLib.endian <> "big":
			glyphIndexArray.byteswap()
		self.cmap = cmap = {}
		for i in range(len(glyphIndexArray)):
			glyphID = glyphIndexArray[i]
			glyphName = ttFont.getGlyphName(glyphID)
			cmap[i+firstCode] = glyphName

	def compile(self, ttFont):
		codes = self.cmap.keys()
		codes.sort()
		assert codes == range(codes[0], codes[0] + len(codes))
		glyphIndexArray = array.array("H", [0] * len(codes))
		firstCode = codes[0]
		for i in range(len(codes)):
			code = codes[i]
			glyphIndexArray[code-firstCode] = ttFont.getGlyphID(self.cmap[code])
		if ttLib.endian <> "big":
			glyphIndexArray.byteswap()
		data = glyphIndexArray.tostring()
		header = struct.pack(">HHHHH",
				6, len(data) + 10, self.version, firstCode, len(self.cmap))
		return header + data

	def toXML(self, writer, ttFont):
		codes = self.cmap.items()
		codes.sort()
		writer.begintag(self.__class__.__name__, [
				("platformID", self.platformID),
				("platEncID", self.platEncID),
				("version", self.version),
				])
		writer.newline()

		for code, name in codes:
			writer.simpletag("map", code=hex(code), name=name)
			writer.newline()

		writer.endtag(self.__class__.__name__)
		writer.newline()

	def fromXML(self, (name, attrs, content), ttFont):
		self.version = safeEval(attrs["version"])
		self.cmap = {}
		for element in content:
			if type(element) <> TupleType:
				continue
			name, attrs, content = element
			if name <> "map":
				continue
			self.cmap[safeEval(attrs["code"])] = attrs["name"]


class cmap_format_unknown(CmapSubtable):

	def decompile(self, data, ttFont):
		self.data = data

	def compile(self, ttFont):
		return self.data


cmap_classes = {
		0: cmap_format_0,
		2: cmap_format_2,
		4: cmap_format_4,
		6: cmap_format_6,
		}