_c_m_a_p.py revision 1a4f96b7871a0cf9b83e89c5f70854ddb0f41a5e
1f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)from __future__ import print_function, division
2f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)from fontTools.misc.py23 import *
3f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)from fontTools.misc.textTools import safeEval, readHex
4f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)from . import DefaultTable
5f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)import sys
6f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)import struct
7f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)import array
8f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)import operator
9f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
10f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
11f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)class table__c_m_a_p(DefaultTable.DefaultTable):
12f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
13f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)	def getcmap(self, platformID, platEncID):
14f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)		for subtable in self.tables:
15f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			if (subtable.platformID == platformID and
16f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)					subtable.platEncID == platEncID):
17f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)				return subtable
18f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)		return None # not found
19f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
20f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)	def decompile(self, data, ttFont):
21f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)		tableVersion, numSubTables = struct.unpack(">HH", data[:4])
22f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)		self.tableVersion = int(tableVersion)
23116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch		self.tables = tables = []
24f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)		seenOffsets = {}
25f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)		for i in range(numSubTables):
26f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			platformID, platEncID, offset = struct.unpack(
27f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)					">HHl", data[4+i*8:4+(i+1)*8])
28f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			platformID, platEncID = int(platformID), int(platEncID)
29f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			format, length = struct.unpack(">HH", data[offset:offset+4])
30f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			if format in [8,10,12,13]:
31f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)				format, reserved, length = struct.unpack(">HHL", data[offset:offset+8])
32f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			elif format in [14]:
33f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)				format, length = struct.unpack(">HL", data[offset:offset+6])
34f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
35f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			if not length:
36f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)				print("Error: cmap subtable is reported as having zero length: platformID %s, platEncID %s,  format %s offset %s. Skipping table." % (platformID, platEncID,format, offset))
37f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)				continue
38f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			if format not in cmap_classes:
39f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)				table = cmap_format_unknown(format)
40f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			else:
41f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)				table = cmap_classes[format](format)
42f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			table.platformID = platformID
43f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			table.platEncID = platEncID
44f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			# Note that by default we decompile only the subtable header info;
45f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			# any other data gets decompiled only when an attribute of the
46f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			# subtable is referenced.
47f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			table.decompileHeader(data[offset:offset+int(length)], ttFont)
48f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			if offset in seenOffsets:
49f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)				table.cmap = tables[seenOffsets[offset]].cmap
50f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			else:
51f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)				seenOffsets[offset] = i
52f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			tables.append(table)
53f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
54f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)	def compile(self, ttFont):
55f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)		self.tables.sort()    # sort according to the spec; see CmapSubtable.__lt__()
56f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)		numSubTables = len(self.tables)
57f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)		totalOffset = 4 + 8 * numSubTables
58f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)		data = struct.pack(">HH", self.tableVersion, numSubTables)
59f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)		tableData = b""
60f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)		seen = {}  # Some tables are the same object reference. Don't compile them twice.
61f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)		done = {}  # Some tables are different objects, but compile to the same data chunk
62f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)		for table in self.tables:
63f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			try:
64f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)				offset = seen[id(table.cmap)]
65f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			except KeyError:
66f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)				chunk = table.compile(ttFont)
67f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)				if chunk in done:
68f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)					offset = done[chunk]
69f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)				else:
70f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)					offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData)
71f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)					tableData = tableData + chunk
72f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset)
73f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)		return data + tableData
74f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
75f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)	def toXML(self, writer, ttFont):
76f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)		writer.simpletag("tableVersion", version=self.tableVersion)
77f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)		writer.newline()
78116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch		for table in self.tables:
79116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch			table.toXML(writer, ttFont)
80f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
81f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)	def fromXML(self, name, attrs, content, ttFont):
82f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)		if name == "tableVersion":
83f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			self.tableVersion = safeEval(attrs["version"])
84f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			return
85f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)		if name[:12] != "cmap_format_":
86f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)			return
87		if not hasattr(self, "tables"):
88			self.tables = []
89		format = safeEval(name[12:])
90		if format not in cmap_classes:
91			table = cmap_format_unknown(format)
92		else:
93			table = cmap_classes[format](format)
94		table.platformID = safeEval(attrs["platformID"])
95		table.platEncID = safeEval(attrs["platEncID"])
96		table.fromXML(name, attrs, content, ttFont)
97		self.tables.append(table)
98
99
100class CmapSubtable(object):
101
102	def __init__(self, format):
103		self.format = format
104		self.data = None
105		self.ttFont = None
106
107	def __getattr__(self, attr):
108		# allow lazy decompilation of subtables.
109		if attr[:2] == '__': # don't handle requests for member functions like '__lt__'
110			raise AttributeError(attr)
111		if self.data is None:
112			raise AttributeError(attr)
113		self.decompile(None, None) # use saved data.
114		self.data = None # Once this table has been decompiled, make sure we don't
115						# just return the original data. Also avoids recursion when
116						# called with an attribute that the cmap subtable doesn't have.
117		return getattr(self, attr)
118
119	def decompileHeader(self, data, ttFont):
120		format, length, language = struct.unpack(">HHH", data[:6])
121		assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length)
122		self.format = int(format)
123		self.length = int(length)
124		self.language = int(language)
125		self.data = data[6:]
126		self.ttFont = ttFont
127
128	def toXML(self, writer, ttFont):
129		writer.begintag(self.__class__.__name__, [
130				("platformID", self.platformID),
131				("platEncID", self.platEncID),
132				("language", self.language),
133				])
134		writer.newline()
135		codes = sorted(self.cmap.items())
136		self._writeCodes(codes, writer)
137		writer.endtag(self.__class__.__name__)
138		writer.newline()
139
140	def _writeCodes(self, codes, writer):
141		if (self.platformID, self.platEncID) == (3, 1) or (self.platformID, self.platEncID) == (3, 10) or self.platformID == 0:
142			from fontTools.unicode import Unicode
143			isUnicode = 1
144		else:
145			isUnicode = 0
146		for code, name in codes:
147			writer.simpletag("map", code=hex(code), name=name)
148			if isUnicode:
149				writer.comment(Unicode[code])
150			writer.newline()
151
152	def __lt__(self, other):
153		if not isinstance(other, CmapSubtable):
154			return NotImplemented
155
156		# implemented so that list.sort() sorts according to the spec.
157		selfTuple = (
158			getattr(self, "platformID", None),
159			getattr(self, "platEncID", None),
160			getattr(self, "language", None),
161			self.__dict__)
162		otherTuple = (
163			getattr(other, "platformID", None),
164			getattr(other, "platEncID", None),
165			getattr(other, "language", None),
166			other.__dict__)
167		return selfTuple < otherTuple
168
169
170class cmap_format_0(CmapSubtable):
171
172	def decompile(self, data, ttFont):
173		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
174		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
175		if data is not None and ttFont is not None:
176			self.decompileHeader(data[offset:offset+int(length)], ttFont)
177		else:
178			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
179		data = self.data # decompileHeader assigns the data after the header to self.data
180		assert 262 == self.length, "Format 0 cmap subtable not 262 bytes"
181		glyphIdArray = array.array("B")
182		glyphIdArray.fromstring(self.data)
183		self.cmap = cmap = {}
184		lenArray = len(glyphIdArray)
185		charCodes = list(range(lenArray))
186		names = map(self.ttFont.getGlyphName, glyphIdArray)
187		list(map(operator.setitem, [cmap]*lenArray, charCodes, names))
188
189
190	def compile(self, ttFont):
191		if self.data:
192			return struct.pack(">HHH", 0, 262, self.language) + self.data
193
194		charCodeList = sorted(self.cmap.items())
195		charCodes = [entry[0] for entry in charCodeList]
196		valueList = [entry[1] for entry in charCodeList]
197		assert charCodes == list(range(256))
198		valueList = map(ttFont.getGlyphID, valueList)
199
200		glyphIdArray = array.array("B", valueList)
201		data = struct.pack(">HHH", 0, 262, self.language) + glyphIdArray.tostring()
202		assert len(data) == 262
203		return data
204
205	def fromXML(self, name, attrs, content, ttFont):
206		self.language = safeEval(attrs["language"])
207		if not hasattr(self, "cmap"):
208			self.cmap = {}
209		cmap = self.cmap
210		for element in content:
211			if not isinstance(element, tuple):
212				continue
213			name, attrs, content = element
214			if name != "map":
215				continue
216			cmap[safeEval(attrs["code"])] = attrs["name"]
217
218
219subHeaderFormat = ">HHhH"
220class SubHeader(object):
221	def __init__(self):
222		self.firstCode = None
223		self.entryCount = None
224		self.idDelta = None
225		self.idRangeOffset = None
226		self.glyphIndexArray = []
227
228class cmap_format_2(CmapSubtable):
229
230	def setIDDelta(self, subHeader):
231		subHeader.idDelta = 0
232		# find the minGI which is not zero.
233		minGI = subHeader.glyphIndexArray[0]
234		for gid in subHeader.glyphIndexArray:
235			if (gid != 0) and (gid < minGI):
236				minGI = gid
237		# The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
238		# idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K.
239		# We would like to pick an idDelta such that the first glyphArray GID is 1,
240		# so that we are more likely to be able to combine glypharray GID subranges.
241		# This means that we have a problem when minGI is > 32K
242		# Since the final gi is reconstructed from the glyphArray GID by:
243		#    (short)finalGID = (gid +  idDelta) % 0x10000),
244		# we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the
245		# negative number to an unsigned short.
246
247		if  (minGI > 1):
248			if  minGI > 0x7FFF:
249				subHeader.idDelta = -(0x10000 - minGI) -1
250			else:
251				subHeader.idDelta =  minGI -1
252			idDelta = subHeader.idDelta
253			for i in range(subHeader.entryCount):
254				gid = subHeader.glyphIndexArray[i]
255				if gid > 0:
256					subHeader.glyphIndexArray[i] = gid - idDelta
257
258
259	def decompile(self, data, ttFont):
260		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
261		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
262		if data is not None and ttFont is not None:
263			self.decompileHeader(data[offset:offset+int(length)], ttFont)
264		else:
265			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
266
267		data = self.data # decompileHeader assigns the data after the header to self.data
268		subHeaderKeys = []
269		maxSubHeaderindex = 0
270		# get the key array, and determine the number of subHeaders.
271		allKeys = array.array("H")
272		allKeys.fromstring(data[:512])
273		data = data[512:]
274		if sys.byteorder != "big":
275			allKeys.byteswap()
276		subHeaderKeys = [ key//8 for key in allKeys]
277		maxSubHeaderindex = max(subHeaderKeys)
278
279		#Load subHeaders
280		subHeaderList = []
281		pos = 0
282		for i in range(maxSubHeaderindex + 1):
283			subHeader = SubHeader()
284			(subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \
285				subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8])
286			pos += 8
287			giDataPos = pos + subHeader.idRangeOffset-2
288			giList = array.array("H")
289			giList.fromstring(data[giDataPos:giDataPos + subHeader.entryCount*2])
290			if sys.byteorder != "big":
291				giList.byteswap()
292			subHeader.glyphIndexArray = giList
293			subHeaderList.append(subHeader)
294		# How this gets processed.
295		# Charcodes may be one or two bytes.
296		# The first byte of a charcode is mapped through the  subHeaderKeys, to select
297		# a subHeader. For any subheader but 0, the next byte is then mapped through the
298		# selected subheader. If subheader Index 0 is selected, then the byte itself is
299		# mapped through the subheader, and there is no second byte.
300		# Then assume that the subsequent byte is the first byte of the next charcode,and repeat.
301		#
302		# Each subheader references a range in the glyphIndexArray whose length is entryCount.
303		# The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray
304		# referenced by another subheader.
305		# The only subheader that will be referenced by more than one first-byte value is the subheader
306		# that maps the entire range of glyphID values to glyphIndex 0, e.g notdef:
307		#	 {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx}
308		# A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex.
309		# A subheader specifies a subrange within (0...256) by the
310		# firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero
311		# (e.g. glyph not in font).
312		# If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar).
313		# The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by
314		# counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the
315		# glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex.
316		# Example for Logocut-Medium
317		# first byte of charcode = 129; selects subheader 1.
318		# subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252}
319		# second byte of charCode = 66
320		# the index offset = 66-64 = 2.
321		# The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is:
322		# [glyphIndexArray index], [subrange array index] = glyphIndex
323		# [256], [0]=1 	from charcode [129, 64]
324		# [257], [1]=2  	from charcode [129, 65]
325		# [258], [2]=3  	from charcode [129, 66]
326		# [259], [3]=4  	from charcode [129, 67]
327		# So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero,
328		# add it to the glyphID to get the final glyphIndex
329		# value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew!
330
331		self.data = b""
332		self.cmap = cmap = {}
333		notdefGI = 0
334		for firstByte in range(256):
335			subHeadindex = subHeaderKeys[firstByte]
336			subHeader = subHeaderList[subHeadindex]
337			if subHeadindex == 0:
338				if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount):
339					continue # gi is notdef.
340				else:
341					charCode = firstByte
342					offsetIndex = firstByte - subHeader.firstCode
343					gi = subHeader.glyphIndexArray[offsetIndex]
344					if gi != 0:
345						gi = (gi + subHeader.idDelta) % 0x10000
346					else:
347						continue # gi is notdef.
348				cmap[charCode] = gi
349			else:
350				if subHeader.entryCount:
351					charCodeOffset = firstByte * 256 + subHeader.firstCode
352					for offsetIndex in range(subHeader.entryCount):
353						charCode = charCodeOffset + offsetIndex
354						gi = subHeader.glyphIndexArray[offsetIndex]
355						if gi != 0:
356							gi = (gi + subHeader.idDelta) % 0x10000
357						else:
358							continue
359						cmap[charCode] = gi
360				# If not subHeader.entryCount, then all char codes with this first byte are
361				# mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the
362				# same as mapping it to .notdef.
363		# cmap values are GID's.
364		glyphOrder = self.ttFont.getGlyphOrder()
365		gids = list(cmap.values())
366		charCodes = list(cmap.keys())
367		lenCmap = len(gids)
368		try:
369			names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids ))
370		except IndexError:
371			getGlyphName = self.ttFont.getGlyphName
372			names = list(map(getGlyphName, gids ))
373		list(map(operator.setitem, [cmap]*lenCmap, charCodes, names))
374
375
376	def compile(self, ttFont):
377		if self.data:
378			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
379		kEmptyTwoCharCodeRange = -1
380		notdefGI = 0
381
382		items = sorted(self.cmap.items())
383		charCodes = [item[0] for item in items]
384		names = [item[1] for item in items]
385		nameMap = ttFont.getReverseGlyphMap()
386		lenCharCodes = len(charCodes)
387		try:
388			gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
389		except KeyError:
390			nameMap = ttFont.getReverseGlyphMap(rebuild=True)
391			try:
392				gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
393			except KeyError:
394				# allow virtual GIDs in format 2 tables
395				gids = []
396				for name in names:
397					try:
398						gid = nameMap[name]
399					except KeyError:
400						try:
401							if (name[:3] == 'gid'):
402								gid = eval(name[3:])
403							else:
404								gid = ttFont.getGlyphID(name)
405						except:
406							raise KeyError(name)
407
408					gids.append(gid)
409
410		# Process the (char code to gid) item list  in char code order.
411		# By definition, all one byte char codes map to subheader 0.
412		# For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0,
413		# which defines all char codes in its range to map to notdef) unless proven otherwise.
414		# Note that since the char code items are processed in char code order, all the char codes with the
415		# same first byte are in sequential order.
416
417		subHeaderKeys = [ kEmptyTwoCharCodeRange for x in  range(256)] # list of indices into subHeaderList.
418		subHeaderList = []
419
420		# We force this subheader entry 0  to exist in the subHeaderList in the case where some one comes up
421		# with a cmap where all the one byte char codes map to notdef,
422		# with the result that the subhead 0 would not get created just by processing the item list.
423		charCode = charCodes[0]
424		if charCode > 255:
425			subHeader = SubHeader()
426			subHeader.firstCode = 0
427			subHeader.entryCount = 0
428			subHeader.idDelta = 0
429			subHeader.idRangeOffset = 0
430			subHeaderList.append(subHeader)
431
432
433		lastFirstByte = -1
434		items = zip(charCodes, gids)
435		for charCode, gid in items:
436			if gid == 0:
437				continue
438			firstbyte = charCode >> 8
439			secondByte = charCode & 0x00FF
440
441			if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one.
442				if lastFirstByte > -1:
443					# fix GI's and iDelta of current subheader.
444					self.setIDDelta(subHeader)
445
446					# If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero
447					# for the indices matching the char codes.
448					if lastFirstByte == 0:
449						for index in range(subHeader.entryCount):
450							charCode = subHeader.firstCode + index
451							subHeaderKeys[charCode] = 0
452
453					assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange."
454				# init new subheader
455				subHeader = SubHeader()
456				subHeader.firstCode = secondByte
457				subHeader.entryCount = 1
458				subHeader.glyphIndexArray.append(gid)
459				subHeaderList.append(subHeader)
460				subHeaderKeys[firstbyte] = len(subHeaderList) -1
461				lastFirstByte = firstbyte
462			else:
463				# need to fill in with notdefs all the code points between the last charCode and the current charCode.
464				codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount)
465				for i in range(codeDiff):
466					subHeader.glyphIndexArray.append(notdefGI)
467				subHeader.glyphIndexArray.append(gid)
468				subHeader.entryCount = subHeader.entryCount + codeDiff + 1
469
470		# fix GI's and iDelta of last subheader that we we added to the subheader array.
471		self.setIDDelta(subHeader)
472
473		# Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges.
474		subHeader = SubHeader()
475		subHeader.firstCode = 0
476		subHeader.entryCount = 0
477		subHeader.idDelta = 0
478		subHeader.idRangeOffset = 2
479		subHeaderList.append(subHeader)
480		emptySubheadIndex = len(subHeaderList) - 1
481		for index in range(256):
482			if subHeaderKeys[index] == kEmptyTwoCharCodeRange:
483				subHeaderKeys[index] = emptySubheadIndex
484		# Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the
485		# idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray,
486		# since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with
487		# charcode 0 and GID 0.
488
489		idRangeOffset = (len(subHeaderList)-1)*8  + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset.
490		subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2.
491		for index in range(subheadRangeLen):
492			subHeader = subHeaderList[index]
493			subHeader.idRangeOffset = 0
494			for j  in range(index):
495				prevSubhead = subHeaderList[j]
496				if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray
497					subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8
498					subHeader.glyphIndexArray = []
499					break
500			if subHeader.idRangeOffset == 0: # didn't find one.
501				subHeader.idRangeOffset = idRangeOffset
502				idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray.
503			else:
504				idRangeOffset = idRangeOffset - 8  # one less subheader
505
506		# Now we can write out the data!
507		length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array.
508		for subhead in 	subHeaderList[:-1]:
509			length = length + len(subhead.glyphIndexArray)*2  # We can't use subhead.entryCount, as some of the subhead may share subArrays.
510		dataList = [struct.pack(">HHH", 2, length, self.language)]
511		for index in subHeaderKeys:
512			dataList.append(struct.pack(">H", index*8))
513		for subhead in 	subHeaderList:
514			dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset))
515		for subhead in 	subHeaderList[:-1]:
516			for gi in subhead.glyphIndexArray:
517				dataList.append(struct.pack(">H", gi))
518		data = bytesjoin(dataList)
519		assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length)
520		return data
521
522
523	def fromXML(self, name, attrs, content, ttFont):
524		self.language = safeEval(attrs["language"])
525		if not hasattr(self, "cmap"):
526			self.cmap = {}
527		cmap = self.cmap
528
529		for element in content:
530			if not isinstance(element, tuple):
531				continue
532			name, attrs, content = element
533			if name != "map":
534				continue
535			cmap[safeEval(attrs["code"])] = attrs["name"]
536
537
538cmap_format_4_format = ">7H"
539
540#uint16  endCode[segCount]          # Ending character code for each segment, last = 0xFFFF.
541#uint16  reservedPad                # This value should be zero
542#uint16  startCode[segCount]        # Starting character code for each segment
543#uint16  idDelta[segCount]          # Delta for all character codes in segment
544#uint16  idRangeOffset[segCount]    # Offset in bytes to glyph indexArray, or 0
545#uint16  glyphIndexArray[variable]  # Glyph index array
546
547def splitRange(startCode, endCode, cmap):
548	# Try to split a range of character codes into subranges with consecutive
549	# glyph IDs in such a way that the cmap4 subtable can be stored "most"
550	# efficiently. I can't prove I've got the optimal solution, but it seems
551	# to do well with the fonts I tested: none became bigger, many became smaller.
552	if startCode == endCode:
553		return [], [endCode]
554
555	lastID = cmap[startCode]
556	lastCode = startCode
557	inOrder = None
558	orderedBegin = None
559	subRanges = []
560
561	# Gather subranges in which the glyph IDs are consecutive.
562	for code in range(startCode + 1, endCode + 1):
563		glyphID = cmap[code]
564
565		if glyphID - 1 == lastID:
566			if inOrder is None or not inOrder:
567				inOrder = 1
568				orderedBegin = lastCode
569		else:
570			if inOrder:
571				inOrder = 0
572				subRanges.append((orderedBegin, lastCode))
573				orderedBegin = None
574
575		lastID = glyphID
576		lastCode = code
577
578	if inOrder:
579		subRanges.append((orderedBegin, lastCode))
580	assert lastCode == endCode
581
582	# Now filter out those new subranges that would only make the data bigger.
583	# A new segment cost 8 bytes, not using a new segment costs 2 bytes per
584	# character.
585	newRanges = []
586	for b, e in subRanges:
587		if b == startCode and e == endCode:
588			break  # the whole range, we're fine
589		if b == startCode or e == endCode:
590			threshold = 4  # split costs one more segment
591		else:
592			threshold = 8  # split costs two more segments
593		if (e - b + 1) > threshold:
594			newRanges.append((b, e))
595	subRanges = newRanges
596
597	if not subRanges:
598		return [], [endCode]
599
600	if subRanges[0][0] != startCode:
601		subRanges.insert(0, (startCode, subRanges[0][0] - 1))
602	if subRanges[-1][1] != endCode:
603		subRanges.append((subRanges[-1][1] + 1, endCode))
604
605	# Fill the "holes" in the segments list -- those are the segments in which
606	# the glyph IDs are _not_ consecutive.
607	i = 1
608	while i < len(subRanges):
609		if subRanges[i-1][1] + 1 != subRanges[i][0]:
610			subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1))
611			i = i + 1
612		i = i + 1
613
614	# Transform the ranges into startCode/endCode lists.
615	start = []
616	end = []
617	for b, e in subRanges:
618		start.append(b)
619		end.append(e)
620	start.pop(0)
621
622	assert len(start) + 1 == len(end)
623	return start, end
624
625
626class cmap_format_4(CmapSubtable):
627
628	def decompile(self, data, ttFont):
629		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
630		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
631		if data is not None and ttFont is not None:
632			self.decompileHeader(self.data[offset:offset+int(length)], ttFont)
633		else:
634			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
635
636		data = self.data # decompileHeader assigns the data after the header to self.data
637		(segCountX2, searchRange, entrySelector, rangeShift) = \
638					struct.unpack(">4H", data[:8])
639		data = data[8:]
640		segCount = segCountX2 // 2
641
642		allCodes = array.array("H")
643		allCodes.fromstring(data)
644		self.data = data = None
645
646		if sys.byteorder != "big":
647			allCodes.byteswap()
648
649		# divide the data
650		endCode = allCodes[:segCount]
651		allCodes = allCodes[segCount+1:]  # the +1 is skipping the reservedPad field
652		startCode = allCodes[:segCount]
653		allCodes = allCodes[segCount:]
654		idDelta = allCodes[:segCount]
655		allCodes = allCodes[segCount:]
656		idRangeOffset = allCodes[:segCount]
657		glyphIndexArray = allCodes[segCount:]
658		lenGIArray = len(glyphIndexArray)
659
660		# build 2-byte character mapping
661		charCodes = []
662		gids = []
663		for i in range(len(startCode) - 1):	# don't do 0xffff!
664			rangeCharCodes = list(range(startCode[i], endCode[i] + 1))
665			charCodes = charCodes + rangeCharCodes
666			for charCode in rangeCharCodes:
667				rangeOffset = idRangeOffset[i]
668				if rangeOffset == 0:
669					glyphID = charCode + idDelta[i]
670				else:
671					# *someone* needs to get killed.
672					index = idRangeOffset[i] // 2 + (charCode - startCode[i]) + i - len(idRangeOffset)
673					assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array  is not less than the length of the array (%d) !" % (i, index, lenGIArray)
674					if glyphIndexArray[index] != 0:  # if not missing glyph
675						glyphID = glyphIndexArray[index] + idDelta[i]
676					else:
677						glyphID = 0  # missing glyph
678				gids.append(glyphID % 0x10000)
679
680		self.cmap = cmap = {}
681		lenCmap = len(gids)
682		glyphOrder = self.ttFont.getGlyphOrder()
683		try:
684			names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids ))
685		except IndexError:
686			getGlyphName = self.ttFont.getGlyphName
687			names = list(map(getGlyphName, gids ))
688		list(map(operator.setitem, [cmap]*lenCmap, charCodes, names))
689
690
691
692	def setIDDelta(self, idDelta):
693		# The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
694		# idDelta is a short, and must be between -32K and 32K
695		# startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1
696		# This means that we have a problem because we can need to assign to idDelta values
697		# between -(64K-2) and 64K -1.
698		# Since the final gi is reconstructed from the glyphArray GID by:
699		#    (short)finalGID = (gid +  idDelta) % 0x10000),
700		# we can get from a startCode of 0 to a final GID of 64 -1K by subtracting 1, and casting the
701		# negative number to an unsigned short.
702		# Similarly , we can get from a startCode of 64K-1 to a final GID of 1 by adding 2, because of
703		# the modulo arithmetic.
704
705		if idDelta > 0x7FFF:
706			idDelta = idDelta - 0x10000
707		elif idDelta <  -0x7FFF:
708			idDelta = idDelta + 0x10000
709
710		return idDelta
711
712
713	def compile(self, ttFont):
714		if self.data:
715			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
716
717		from fontTools.ttLib.sfnt import maxPowerOfTwo
718
719		charCodes = list(self.cmap.keys())
720		lenCharCodes = len(charCodes)
721		if lenCharCodes == 0:
722			startCode = [0xffff]
723			endCode = [0xffff]
724		else:
725			charCodes.sort()
726			names = list(map(operator.getitem, [self.cmap]*lenCharCodes, charCodes))
727			nameMap = ttFont.getReverseGlyphMap()
728			try:
729				gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
730			except KeyError:
731				nameMap = ttFont.getReverseGlyphMap(rebuild=True)
732				try:
733					gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
734				except KeyError:
735					# allow virtual GIDs in format 4 tables
736					gids = []
737					for name in names:
738						try:
739							gid = nameMap[name]
740						except KeyError:
741							try:
742								if (name[:3] == 'gid'):
743									gid = eval(name[3:])
744								else:
745									gid = ttFont.getGlyphID(name)
746							except:
747								raise KeyError(name)
748
749						gids.append(gid)
750			cmap = {}  # code:glyphID mapping
751			list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids))
752
753			# Build startCode and endCode lists.
754			# Split the char codes in ranges of consecutive char codes, then split
755			# each range in more ranges of consecutive/not consecutive glyph IDs.
756			# See splitRange().
757			lastCode = charCodes[0]
758			endCode = []
759			startCode = [lastCode]
760			for charCode in charCodes[1:]:  # skip the first code, it's the first start code
761				if charCode == lastCode + 1:
762					lastCode = charCode
763					continue
764				start, end = splitRange(startCode[-1], lastCode, cmap)
765				startCode.extend(start)
766				endCode.extend(end)
767				startCode.append(charCode)
768				lastCode = charCode
769			endCode.append(lastCode)
770			startCode.append(0xffff)
771			endCode.append(0xffff)
772
773		# build up rest of cruft
774		idDelta = []
775		idRangeOffset = []
776		glyphIndexArray = []
777		for i in range(len(endCode)-1):  # skip the closing codes (0xffff)
778			indices = []
779			for charCode in range(startCode[i], endCode[i] + 1):
780				indices.append(cmap[charCode])
781			if  (indices == list(range(indices[0], indices[0] + len(indices)))):
782				idDeltaTemp = self.setIDDelta(indices[0] - startCode[i])
783				idDelta.append( idDeltaTemp)
784				idRangeOffset.append(0)
785			else:
786				# someone *definitely* needs to get killed.
787				idDelta.append(0)
788				idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i))
789				glyphIndexArray.extend(indices)
790		idDelta.append(1)  # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef
791		idRangeOffset.append(0)
792
793		# Insane.
794		segCount = len(endCode)
795		segCountX2 = segCount * 2
796		maxExponent = maxPowerOfTwo(segCount)
797		searchRange = 2 * (2 ** maxExponent)
798		entrySelector = maxExponent
799		rangeShift = 2 * segCount - searchRange
800
801		charCodeArray = array.array("H", endCode + [0] + startCode)
802		idDeltaeArray = array.array("h", idDelta)
803		restArray = array.array("H", idRangeOffset + glyphIndexArray)
804		if sys.byteorder != "big":
805			charCodeArray.byteswap()
806			idDeltaeArray.byteswap()
807			restArray.byteswap()
808		data = charCodeArray.tostring() + idDeltaeArray.tostring() + restArray.tostring()
809
810		length = struct.calcsize(cmap_format_4_format) + len(data)
811		header = struct.pack(cmap_format_4_format, self.format, length, self.language,
812				segCountX2, searchRange, entrySelector, rangeShift)
813		return header + data
814
815	def fromXML(self, name, attrs, content, ttFont):
816		self.language = safeEval(attrs["language"])
817		if not hasattr(self, "cmap"):
818			self.cmap = {}
819		cmap = self.cmap
820
821		for element in content:
822			if not isinstance(element, tuple):
823				continue
824			nameMap, attrsMap, dummyContent = element
825			if nameMap != "map":
826				assert 0, "Unrecognized keyword in cmap subtable"
827			cmap[safeEval(attrsMap["code"])] = attrsMap["name"]
828
829
830class cmap_format_6(CmapSubtable):
831
832	def decompile(self, data, ttFont):
833		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
834		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
835		if data is not None and ttFont is not None:
836			self.decompileHeader(data[offset:offset+int(length)], ttFont)
837		else:
838			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
839
840		data = self.data # decompileHeader assigns the data after the header to self.data
841		firstCode, entryCount = struct.unpack(">HH", data[:4])
842		firstCode = int(firstCode)
843		data = data[4:]
844		#assert len(data) == 2 * entryCount  # XXX not true in Apple's Helvetica!!!
845		glyphIndexArray = array.array("H")
846		glyphIndexArray.fromstring(data[:2 * int(entryCount)])
847		if sys.byteorder != "big":
848			glyphIndexArray.byteswap()
849		self.data = data = None
850
851		self.cmap = cmap = {}
852
853		lenArray = len(glyphIndexArray)
854		charCodes = list(range(firstCode, firstCode + lenArray))
855		glyphOrder = self.ttFont.getGlyphOrder()
856		try:
857			names = list(map(operator.getitem, [glyphOrder]*lenArray, glyphIndexArray ))
858		except IndexError:
859			getGlyphName = self.ttFont.getGlyphName
860			names = list(map(getGlyphName, glyphIndexArray ))
861		list(map(operator.setitem, [cmap]*lenArray, charCodes, names))
862
863	def compile(self, ttFont):
864		if self.data:
865			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
866		cmap = self.cmap
867		codes = list(cmap.keys())
868		if codes: # yes, there are empty cmap tables.
869			codes = list(range(codes[0], codes[-1] + 1))
870			firstCode = codes[0]
871			valueList = [cmap.get(code, ".notdef") for code in codes]
872			valueList = map(ttFont.getGlyphID, valueList)
873			glyphIndexArray = array.array("H", valueList)
874			if sys.byteorder != "big":
875				glyphIndexArray.byteswap()
876			data = glyphIndexArray.tostring()
877		else:
878			data = b""
879			firstCode = 0
880		header = struct.pack(">HHHHH",
881				6, len(data) + 10, self.language, firstCode, len(codes))
882		return header + data
883
884	def fromXML(self, name, attrs, content, ttFont):
885		self.language = safeEval(attrs["language"])
886		if not hasattr(self, "cmap"):
887			self.cmap = {}
888		cmap = self.cmap
889
890		for element in content:
891			if not isinstance(element, tuple):
892				continue
893			name, attrs, content = element
894			if name != "map":
895				continue
896			cmap[safeEval(attrs["code"])] = attrs["name"]
897
898
899class cmap_format_12_or_13(CmapSubtable):
900
901	def __init__(self, format):
902		self.format = format
903		self.reserved = 0
904		self.data = None
905		self.ttFont = None
906
907	def decompileHeader(self, data, ttFont):
908		format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16])
909		assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length)
910		self.format = format
911		self.reserved = reserved
912		self.length = length
913		self.language = language
914		self.nGroups = nGroups
915		self.data = data[16:]
916		self.ttFont = ttFont
917
918	def decompile(self, data, ttFont):
919		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
920		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
921		if data is not None and ttFont is not None:
922			self.decompileHeader(data[offset:offset+int(length)], ttFont)
923		else:
924			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
925
926		data = self.data # decompileHeader assigns the data after the header to self.data
927		charCodes = []
928		gids = []
929		pos = 0
930		for i in range(self.nGroups):
931			startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] )
932			pos += 12
933			lenGroup = 1 + endCharCode - startCharCode
934			charCodes += list(range(startCharCode, endCharCode +1))
935			gids += self._computeGIDs(glyphID, lenGroup)
936		self.data = data = None
937		self.cmap = cmap = {}
938		lenCmap = len(gids)
939		glyphOrder = self.ttFont.getGlyphOrder()
940		try:
941			names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids ))
942		except IndexError:
943			getGlyphName = self.ttFont.getGlyphName
944			names = list(map(getGlyphName, gids ))
945		list(map(operator.setitem, [cmap]*lenCmap, charCodes, names))
946
947	def compile(self, ttFont):
948		if self.data:
949			return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data
950		charCodes = list(self.cmap.keys())
951		lenCharCodes = len(charCodes)
952		names = list(self.cmap.values())
953		nameMap = ttFont.getReverseGlyphMap()
954		try:
955			gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
956		except KeyError:
957			nameMap = ttFont.getReverseGlyphMap(rebuild=True)
958			try:
959				gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
960			except KeyError:
961				# allow virtual GIDs in format 12 tables
962				gids = []
963				for name in names:
964					try:
965						gid = nameMap[name]
966					except KeyError:
967						try:
968							if (name[:3] == 'gid'):
969								gid = eval(name[3:])
970							else:
971								gid = ttFont.getGlyphID(name)
972						except:
973							raise KeyError(name)
974
975					gids.append(gid)
976
977		cmap = {}  # code:glyphID mapping
978		list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids))
979
980		charCodes.sort()
981		index = 0
982		startCharCode = charCodes[0]
983		startGlyphID = cmap[startCharCode]
984		lastGlyphID = startGlyphID - self._format_step
985		lastCharCode = startCharCode - 1
986		nGroups = 0
987		dataList =  []
988		maxIndex = len(charCodes)
989		for index in range(maxIndex):
990			charCode = charCodes[index]
991			glyphID = cmap[charCode]
992			if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode):
993				dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
994				startCharCode = charCode
995				startGlyphID = glyphID
996				nGroups = nGroups + 1
997			lastGlyphID = glyphID
998			lastCharCode = charCode
999		dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
1000		nGroups = nGroups + 1
1001		data = bytesjoin(dataList)
1002		lengthSubtable = len(data) +16
1003		assert len(data) == (nGroups*12) == (lengthSubtable-16)
1004		return struct.pack(">HHLLL", self.format, self.reserved , lengthSubtable, self.language, nGroups) + data
1005
1006	def toXML(self, writer, ttFont):
1007		writer.begintag(self.__class__.__name__, [
1008				("platformID", self.platformID),
1009				("platEncID", self.platEncID),
1010				("format", self.format),
1011				("reserved", self.reserved),
1012				("length", self.length),
1013				("language", self.language),
1014				("nGroups", self.nGroups),
1015				])
1016		writer.newline()
1017		codes = sorted(self.cmap.items())
1018		self._writeCodes(codes, writer)
1019		writer.endtag(self.__class__.__name__)
1020		writer.newline()
1021
1022	def fromXML(self, name, attrs, content, ttFont):
1023		self.format = safeEval(attrs["format"])
1024		self.reserved = safeEval(attrs["reserved"])
1025		self.length = safeEval(attrs["length"])
1026		self.language = safeEval(attrs["language"])
1027		self.nGroups = safeEval(attrs["nGroups"])
1028		if not hasattr(self, "cmap"):
1029			self.cmap = {}
1030		cmap = self.cmap
1031
1032		for element in content:
1033			if not isinstance(element, tuple):
1034				continue
1035			name, attrs, content = element
1036			if name != "map":
1037				continue
1038			cmap[safeEval(attrs["code"])] = attrs["name"]
1039
1040
1041class cmap_format_12(cmap_format_12_or_13):
1042	def __init__(self, format):
1043		cmap_format_12_or_13.__init__(self, format)
1044		self._format_step = 1
1045
1046	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
1047		return list(range(startingGlyph, startingGlyph + numberOfGlyphs))
1048
1049	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
1050		return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode)
1051
1052
1053class cmap_format_13(cmap_format_12_or_13):
1054	def __init__(self, format):
1055		cmap_format_12_or_13.__init__(self, format)
1056		self._format_step = 0
1057
1058	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
1059		return [startingGlyph] * numberOfGlyphs
1060
1061	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
1062		return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode)
1063
1064
1065def  cvtToUVS(threeByteString):
1066	data = b"\0" + threeByteString
1067	val, = struct.unpack(">L", data)
1068	return val
1069
1070def  cvtFromUVS(val):
1071	assert 0 <= val < 0x1000000
1072	fourByteString = struct.pack(">L", val)
1073	return fourByteString[1:]
1074
1075
1076class cmap_format_14(CmapSubtable):
1077
1078	def decompileHeader(self, data, ttFont):
1079		format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10])
1080		self.data = data[10:]
1081		self.length = length
1082		self.numVarSelectorRecords = numVarSelectorRecords
1083		self.ttFont = ttFont
1084		self.language = 0xFF # has no language.
1085
1086	def decompile(self, data, ttFont):
1087		if data is not None and ttFont is not None and ttFont.lazy:
1088			self.decompileHeader(data, ttFont)
1089		else:
1090			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
1091		data = self.data
1092
1093		self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1094		uvsDict = {}
1095		recOffset = 0
1096		for n in range(self.numVarSelectorRecords):
1097			uvs, defOVSOffset, nonDefUVSOffset =  struct.unpack(">3sLL", data[recOffset:recOffset +11])
1098			recOffset += 11
1099			varUVS = cvtToUVS(uvs)
1100			if defOVSOffset:
1101				startOffset = defOVSOffset  - 10
1102				numValues, = struct.unpack(">L", data[startOffset:startOffset+4])
1103				startOffset +=4
1104				for r in range(numValues):
1105					uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4])
1106					startOffset += 4
1107					firstBaseUV = cvtToUVS(uv)
1108					cnt = addtlCnt+1
1109					baseUVList = list(range(firstBaseUV, firstBaseUV+cnt))
1110					glyphList = [None]*cnt
1111					localUVList = zip(baseUVList, glyphList)
1112					try:
1113						uvsDict[varUVS].extend(localUVList)
1114					except KeyError:
1115						uvsDict[varUVS] = list(localUVList)
1116
1117			if nonDefUVSOffset:
1118				startOffset = nonDefUVSOffset  - 10
1119				numRecs, = struct.unpack(">L", data[startOffset:startOffset+4])
1120				startOffset +=4
1121				localUVList = []
1122				for r in range(numRecs):
1123					uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5])
1124					startOffset += 5
1125					uv = cvtToUVS(uv)
1126					glyphName = self.ttFont.getGlyphName(gid)
1127					localUVList.append( [uv, glyphName] )
1128				try:
1129					uvsDict[varUVS].extend(localUVList)
1130				except KeyError:
1131					uvsDict[varUVS] = localUVList
1132
1133		self.uvsDict = uvsDict
1134
1135	def toXML(self, writer, ttFont):
1136		writer.begintag(self.__class__.__name__, [
1137				("platformID", self.platformID),
1138				("platEncID", self.platEncID),
1139				("format", self.format),
1140				("length", self.length),
1141				("numVarSelectorRecords", self.numVarSelectorRecords),
1142				])
1143		writer.newline()
1144		uvsDict = self.uvsDict
1145		uvsList = sorted(uvsDict.keys())
1146		for uvs in uvsList:
1147			uvList = uvsDict[uvs]
1148			uvList.sort(key=lambda item: (item[1] is not None, item[0], item[1]))
1149			for uv, gname in uvList:
1150				if gname is None:
1151					gname = "None"
1152				# I use the arg rather than th keyword syntax in order to preserve the attribute order.
1153				writer.simpletag("map", [ ("uvs",hex(uvs)), ("uv",hex(uv)), ("name", gname)]  )
1154				writer.newline()
1155		writer.endtag(self.__class__.__name__)
1156		writer.newline()
1157
1158	def fromXML(self, name, attrs, content, ttFont):
1159		self.format = safeEval(attrs["format"])
1160		self.length = safeEval(attrs["length"])
1161		self.numVarSelectorRecords = safeEval(attrs["numVarSelectorRecords"])
1162		self.language = 0xFF # provide a value so that  CmapSubtable.__lt__() won't fail
1163		if not hasattr(self, "cmap"):
1164			self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1165		if not hasattr(self, "uvsDict"):
1166			self.uvsDict  = {}
1167			uvsDict = self.uvsDict
1168
1169		for element in content:
1170			if not isinstance(element, tuple):
1171				continue
1172			name, attrs, content = element
1173			if name != "map":
1174				continue
1175			uvs = safeEval(attrs["uvs"])
1176			uv = safeEval(attrs["uv"])
1177			gname = attrs["name"]
1178			if gname == "None":
1179				gname = None
1180			try:
1181				uvsDict[uvs].append( [uv, gname])
1182			except KeyError:
1183				uvsDict[uvs] = [ [uv, gname] ]
1184
1185
1186	def compile(self, ttFont):
1187		if self.data:
1188			return struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) + self.data
1189
1190		uvsDict = self.uvsDict
1191		uvsList = sorted(uvsDict.keys())
1192		self.numVarSelectorRecords = len(uvsList)
1193		offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block.
1194		data = []
1195		varSelectorRecords =[]
1196		for uvs in uvsList:
1197			entryList = uvsDict[uvs]
1198
1199			defList = [entry for entry in entryList if entry[1] is None]
1200			if defList:
1201				defList = [entry[0] for entry in defList]
1202				defOVSOffset = offset
1203				defList.sort()
1204
1205				lastUV = defList[0]
1206				cnt = -1
1207				defRecs = []
1208				for defEntry in defList:
1209					cnt +=1
1210					if (lastUV+cnt) != defEntry:
1211						rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1)
1212						lastUV = defEntry
1213						defRecs.append(rec)
1214						cnt = 0
1215
1216				rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt)
1217				defRecs.append(rec)
1218
1219				numDefRecs = len(defRecs)
1220				data.append(struct.pack(">L", numDefRecs))
1221				data.extend(defRecs)
1222				offset += 4 + numDefRecs*4
1223			else:
1224				defOVSOffset = 0
1225
1226			ndefList = [entry for entry in entryList if entry[1] is not None]
1227			if ndefList:
1228				nonDefUVSOffset = offset
1229				ndefList.sort()
1230				numNonDefRecs = len(ndefList)
1231				data.append(struct.pack(">L", numNonDefRecs))
1232				offset += 4 + numNonDefRecs*5
1233
1234				for uv, gname in ndefList:
1235					gid = ttFont.getGlyphID(gname)
1236					ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid)
1237					data.append(ndrec)
1238			else:
1239				nonDefUVSOffset = 0
1240
1241			vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset)
1242			varSelectorRecords.append(vrec)
1243
1244		data = bytesjoin(varSelectorRecords) + bytesjoin(data)
1245		self.length = 10 + len(data)
1246		headerdata = struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords)
1247		self.data = headerdata + data
1248
1249		return self.data
1250
1251
1252class cmap_format_unknown(CmapSubtable):
1253
1254	def toXML(self, writer, ttFont):
1255		cmapName = self.__class__.__name__[:12] + str(self.format)
1256		writer.begintag(cmapName, [
1257				("platformID", self.platformID),
1258				("platEncID", self.platEncID),
1259				])
1260		writer.newline()
1261		writer.dumphex(self.data)
1262		writer.endtag(cmapName)
1263		writer.newline()
1264
1265	def fromXML(self, name, attrs, content, ttFont):
1266		self.data = readHex(content)
1267		self.cmap = {}
1268
1269	def decompileHeader(self, data, ttFont):
1270		self.language = 0  # dummy value
1271		self.data = data
1272
1273	def decompile(self, data, ttFont):
1274		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
1275		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
1276		if data is not None and ttFont is not None:
1277			self.decompileHeader(data[offset:offset+int(length)], ttFont)
1278		else:
1279			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
1280
1281	def compile(self, ttFont):
1282		if self.data:
1283			return self.data
1284		else:
1285			return None
1286
1287cmap_classes = {
1288		0: cmap_format_0,
1289		2: cmap_format_2,
1290		4: cmap_format_4,
1291		6: cmap_format_6,
1292		12: cmap_format_12,
1293		13: cmap_format_13,
1294		14: cmap_format_14,
1295		}
1296