_c_m_a_p.py revision 0d182bfb8078665313280db759b782c3144f65fa
1from __future__ import print_function, division, absolute_import
2from fontTools.misc.py23 import *
3from fontTools.misc.textTools import safeEval, readHex
4from fontTools.unicode import Unicode
5from . import DefaultTable
6import sys
7import struct
8import array
9import operator
10
11
12class table__c_m_a_p(DefaultTable.DefaultTable):
13
14	def getcmap(self, platformID, platEncID):
15		for subtable in self.tables:
16			if (subtable.platformID == platformID and
17					subtable.platEncID == platEncID):
18				return subtable
19		return None # not found
20
21	def decompile(self, data, ttFont):
22		tableVersion, numSubTables = struct.unpack(">HH", data[:4])
23		self.tableVersion = int(tableVersion)
24		self.tables = tables = []
25		seenOffsets = {}
26		for i in range(numSubTables):
27			platformID, platEncID, offset = struct.unpack(
28					">HHl", data[4+i*8:4+(i+1)*8])
29			platformID, platEncID = int(platformID), int(platEncID)
30			format, length = struct.unpack(">HH", data[offset:offset+4])
31			if format in [8,10,12,13]:
32				format, reserved, length = struct.unpack(">HHL", data[offset:offset+8])
33			elif format in [14]:
34				format, length = struct.unpack(">HL", data[offset:offset+6])
35
36			if not length:
37				print("Error: cmap subtable is reported as having zero length: platformID %s, platEncID %s,  format %s offset %s. Skipping table." % (platformID, platEncID,format, offset))
38				continue
39			if format not in cmap_classes:
40				table = cmap_format_unknown(format)
41			else:
42				table = cmap_classes[format](format)
43			table.platformID = platformID
44			table.platEncID = platEncID
45			# Note that by default we decompile only the subtable header info;
46			# any other data gets decompiled only when an attribute of the
47			# subtable is referenced.
48			table.decompileHeader(data[offset:offset+int(length)], ttFont)
49			if offset in seenOffsets:
50				table.cmap = tables[seenOffsets[offset]].cmap
51			else:
52				seenOffsets[offset] = i
53			tables.append(table)
54
55	def compile(self, ttFont):
56		self.tables.sort()    # sort according to the spec; see CmapSubtable.__lt__()
57		numSubTables = len(self.tables)
58		totalOffset = 4 + 8 * numSubTables
59		data = struct.pack(">HH", self.tableVersion, numSubTables)
60		tableData = b""
61		seen = {}  # Some tables are the same object reference. Don't compile them twice.
62		done = {}  # Some tables are different objects, but compile to the same data chunk
63		for table in self.tables:
64			try:
65				offset = seen[id(table.cmap)]
66			except KeyError:
67				chunk = table.compile(ttFont)
68				if chunk in done:
69					offset = done[chunk]
70				else:
71					offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData)
72					tableData = tableData + chunk
73			data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset)
74		return data + tableData
75
76	def toXML(self, writer, ttFont):
77		writer.simpletag("tableVersion", version=self.tableVersion)
78		writer.newline()
79		for table in self.tables:
80			table.toXML(writer, ttFont)
81
82	def fromXML(self, name, attrs, content, ttFont):
83		if name == "tableVersion":
84			self.tableVersion = safeEval(attrs["version"])
85			return
86		if name[:12] != "cmap_format_":
87			return
88		if not hasattr(self, "tables"):
89			self.tables = []
90		format = safeEval(name[12:])
91		if format not in cmap_classes:
92			table = cmap_format_unknown(format)
93		else:
94			table = cmap_classes[format](format)
95		table.platformID = safeEval(attrs["platformID"])
96		table.platEncID = safeEval(attrs["platEncID"])
97		table.fromXML(name, attrs, content, ttFont)
98		self.tables.append(table)
99
100
101class CmapSubtable(object):
102
103	def __init__(self, format):
104		self.format = format
105		self.data = None
106		self.ttFont = None
107
108	def __getattr__(self, attr):
109		# allow lazy decompilation of subtables.
110		if attr[:2] == '__': # don't handle requests for member functions like '__lt__'
111			raise AttributeError(attr)
112		if self.data is None:
113			raise AttributeError(attr)
114		self.decompile(None, None) # use saved data.
115		self.data = None # Once this table has been decompiled, make sure we don't
116						# just return the original data. Also avoids recursion when
117						# called with an attribute that the cmap subtable doesn't have.
118		return getattr(self, attr)
119
120	def decompileHeader(self, data, ttFont):
121		format, length, language = struct.unpack(">HHH", data[:6])
122		assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length)
123		self.format = int(format)
124		self.length = int(length)
125		self.language = int(language)
126		self.data = data[6:]
127		self.ttFont = ttFont
128
129	def toXML(self, writer, ttFont):
130		writer.begintag(self.__class__.__name__, [
131				("platformID", self.platformID),
132				("platEncID", self.platEncID),
133				("language", self.language),
134				])
135		writer.newline()
136		codes = sorted(self.cmap.items())
137		self._writeCodes(codes, writer)
138		writer.endtag(self.__class__.__name__)
139		writer.newline()
140
141	def isUnicode(self):
142		return (self.platformID == 0 or
143			(self.platformID == 3 and self.platEncID in [1, 10]))
144
145	def isSymbol(self):
146		return self.platformID == 3 and self.platEncID == 0
147
148	def _writeCodes(self, codes, writer):
149		isUnicode = self.isUnicode()
150		for code, name in codes:
151			writer.simpletag("map", code=hex(code), name=name)
152			if isUnicode:
153				writer.comment(Unicode[code])
154			writer.newline()
155
156	def __lt__(self, other):
157		if not isinstance(other, CmapSubtable):
158			return NotImplemented
159
160		# implemented so that list.sort() sorts according to the spec.
161		selfTuple = (
162			getattr(self, "platformID", None),
163			getattr(self, "platEncID", None),
164			getattr(self, "language", None),
165			self.__dict__)
166		otherTuple = (
167			getattr(other, "platformID", None),
168			getattr(other, "platEncID", None),
169			getattr(other, "language", None),
170			other.__dict__)
171		return selfTuple < otherTuple
172
173
174class cmap_format_0(CmapSubtable):
175
176	def decompile(self, data, ttFont):
177		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
178		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
179		if data is not None and ttFont is not None:
180			self.decompileHeader(data[offset:offset+int(length)], ttFont)
181		else:
182			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
183		data = self.data # decompileHeader assigns the data after the header to self.data
184		assert 262 == self.length, "Format 0 cmap subtable not 262 bytes"
185		glyphIdArray = array.array("B")
186		glyphIdArray.fromstring(self.data)
187		self.cmap = cmap = {}
188		lenArray = len(glyphIdArray)
189		charCodes = list(range(lenArray))
190		names = map(self.ttFont.getGlyphName, glyphIdArray)
191		list(map(operator.setitem, [cmap]*lenArray, charCodes, names))
192
193
194	def compile(self, ttFont):
195		if self.data:
196			return struct.pack(">HHH", 0, 262, self.language) + self.data
197
198		charCodeList = sorted(self.cmap.items())
199		charCodes = [entry[0] for entry in charCodeList]
200		valueList = [entry[1] for entry in charCodeList]
201		assert charCodes == list(range(256))
202		valueList = map(ttFont.getGlyphID, valueList)
203
204		glyphIdArray = array.array("B", valueList)
205		data = struct.pack(">HHH", 0, 262, self.language) + glyphIdArray.tostring()
206		assert len(data) == 262
207		return data
208
209	def fromXML(self, name, attrs, content, ttFont):
210		self.language = safeEval(attrs["language"])
211		if not hasattr(self, "cmap"):
212			self.cmap = {}
213		cmap = self.cmap
214		for element in content:
215			if not isinstance(element, tuple):
216				continue
217			name, attrs, content = element
218			if name != "map":
219				continue
220			cmap[safeEval(attrs["code"])] = attrs["name"]
221
222
223subHeaderFormat = ">HHhH"
224class SubHeader(object):
225	def __init__(self):
226		self.firstCode = None
227		self.entryCount = None
228		self.idDelta = None
229		self.idRangeOffset = None
230		self.glyphIndexArray = []
231
232class cmap_format_2(CmapSubtable):
233
234	def setIDDelta(self, subHeader):
235		subHeader.idDelta = 0
236		# find the minGI which is not zero.
237		minGI = subHeader.glyphIndexArray[0]
238		for gid in subHeader.glyphIndexArray:
239			if (gid != 0) and (gid < minGI):
240				minGI = gid
241		# The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
242		# idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K.
243		# We would like to pick an idDelta such that the first glyphArray GID is 1,
244		# so that we are more likely to be able to combine glypharray GID subranges.
245		# This means that we have a problem when minGI is > 32K
246		# Since the final gi is reconstructed from the glyphArray GID by:
247		#    (short)finalGID = (gid +  idDelta) % 0x10000),
248		# we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the
249		# negative number to an unsigned short.
250
251		if  (minGI > 1):
252			if  minGI > 0x7FFF:
253				subHeader.idDelta = -(0x10000 - minGI) -1
254			else:
255				subHeader.idDelta =  minGI -1
256			idDelta = subHeader.idDelta
257			for i in range(subHeader.entryCount):
258				gid = subHeader.glyphIndexArray[i]
259				if gid > 0:
260					subHeader.glyphIndexArray[i] = gid - idDelta
261
262
263	def decompile(self, data, ttFont):
264		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
265		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
266		if data is not None and ttFont is not None:
267			self.decompileHeader(data[offset:offset+int(length)], ttFont)
268		else:
269			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
270
271		data = self.data # decompileHeader assigns the data after the header to self.data
272		subHeaderKeys = []
273		maxSubHeaderindex = 0
274		# get the key array, and determine the number of subHeaders.
275		allKeys = array.array("H")
276		allKeys.fromstring(data[:512])
277		data = data[512:]
278		if sys.byteorder != "big":
279			allKeys.byteswap()
280		subHeaderKeys = [ key//8 for key in allKeys]
281		maxSubHeaderindex = max(subHeaderKeys)
282
283		#Load subHeaders
284		subHeaderList = []
285		pos = 0
286		for i in range(maxSubHeaderindex + 1):
287			subHeader = SubHeader()
288			(subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \
289				subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8])
290			pos += 8
291			giDataPos = pos + subHeader.idRangeOffset-2
292			giList = array.array("H")
293			giList.fromstring(data[giDataPos:giDataPos + subHeader.entryCount*2])
294			if sys.byteorder != "big":
295				giList.byteswap()
296			subHeader.glyphIndexArray = giList
297			subHeaderList.append(subHeader)
298		# How this gets processed.
299		# Charcodes may be one or two bytes.
300		# The first byte of a charcode is mapped through the  subHeaderKeys, to select
301		# a subHeader. For any subheader but 0, the next byte is then mapped through the
302		# selected subheader. If subheader Index 0 is selected, then the byte itself is
303		# mapped through the subheader, and there is no second byte.
304		# Then assume that the subsequent byte is the first byte of the next charcode,and repeat.
305		#
306		# Each subheader references a range in the glyphIndexArray whose length is entryCount.
307		# The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray
308		# referenced by another subheader.
309		# The only subheader that will be referenced by more than one first-byte value is the subheader
310		# that maps the entire range of glyphID values to glyphIndex 0, e.g notdef:
311		#	 {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx}
312		# A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex.
313		# A subheader specifies a subrange within (0...256) by the
314		# firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero
315		# (e.g. glyph not in font).
316		# If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar).
317		# The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by
318		# counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the
319		# glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex.
320		# Example for Logocut-Medium
321		# first byte of charcode = 129; selects subheader 1.
322		# subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252}
323		# second byte of charCode = 66
324		# the index offset = 66-64 = 2.
325		# The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is:
326		# [glyphIndexArray index], [subrange array index] = glyphIndex
327		# [256], [0]=1 	from charcode [129, 64]
328		# [257], [1]=2  	from charcode [129, 65]
329		# [258], [2]=3  	from charcode [129, 66]
330		# [259], [3]=4  	from charcode [129, 67]
331		# So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero,
332		# add it to the glyphID to get the final glyphIndex
333		# value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew!
334
335		self.data = b""
336		self.cmap = cmap = {}
337		notdefGI = 0
338		for firstByte in range(256):
339			subHeadindex = subHeaderKeys[firstByte]
340			subHeader = subHeaderList[subHeadindex]
341			if subHeadindex == 0:
342				if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount):
343					continue # gi is notdef.
344				else:
345					charCode = firstByte
346					offsetIndex = firstByte - subHeader.firstCode
347					gi = subHeader.glyphIndexArray[offsetIndex]
348					if gi != 0:
349						gi = (gi + subHeader.idDelta) % 0x10000
350					else:
351						continue # gi is notdef.
352				cmap[charCode] = gi
353			else:
354				if subHeader.entryCount:
355					charCodeOffset = firstByte * 256 + subHeader.firstCode
356					for offsetIndex in range(subHeader.entryCount):
357						charCode = charCodeOffset + offsetIndex
358						gi = subHeader.glyphIndexArray[offsetIndex]
359						if gi != 0:
360							gi = (gi + subHeader.idDelta) % 0x10000
361						else:
362							continue
363						cmap[charCode] = gi
364				# If not subHeader.entryCount, then all char codes with this first byte are
365				# mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the
366				# same as mapping it to .notdef.
367		# cmap values are GID's.
368		glyphOrder = self.ttFont.getGlyphOrder()
369		gids = list(cmap.values())
370		charCodes = list(cmap.keys())
371		lenCmap = len(gids)
372		try:
373			names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids ))
374		except IndexError:
375			getGlyphName = self.ttFont.getGlyphName
376			names = list(map(getGlyphName, gids ))
377		list(map(operator.setitem, [cmap]*lenCmap, charCodes, names))
378
379
380	def compile(self, ttFont):
381		if self.data:
382			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
383		kEmptyTwoCharCodeRange = -1
384		notdefGI = 0
385
386		items = sorted(self.cmap.items())
387		charCodes = [item[0] for item in items]
388		names = [item[1] for item in items]
389		nameMap = ttFont.getReverseGlyphMap()
390		lenCharCodes = len(charCodes)
391		try:
392			gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
393		except KeyError:
394			nameMap = ttFont.getReverseGlyphMap(rebuild=True)
395			try:
396				gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
397			except KeyError:
398				# allow virtual GIDs in format 2 tables
399				gids = []
400				for name in names:
401					try:
402						gid = nameMap[name]
403					except KeyError:
404						try:
405							if (name[:3] == 'gid'):
406								gid = eval(name[3:])
407							else:
408								gid = ttFont.getGlyphID(name)
409						except:
410							raise KeyError(name)
411
412					gids.append(gid)
413
414		# Process the (char code to gid) item list  in char code order.
415		# By definition, all one byte char codes map to subheader 0.
416		# For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0,
417		# which defines all char codes in its range to map to notdef) unless proven otherwise.
418		# Note that since the char code items are processed in char code order, all the char codes with the
419		# same first byte are in sequential order.
420
421		subHeaderKeys = [ kEmptyTwoCharCodeRange for x in  range(256)] # list of indices into subHeaderList.
422		subHeaderList = []
423
424		# We force this subheader entry 0  to exist in the subHeaderList in the case where some one comes up
425		# with a cmap where all the one byte char codes map to notdef,
426		# with the result that the subhead 0 would not get created just by processing the item list.
427		charCode = charCodes[0]
428		if charCode > 255:
429			subHeader = SubHeader()
430			subHeader.firstCode = 0
431			subHeader.entryCount = 0
432			subHeader.idDelta = 0
433			subHeader.idRangeOffset = 0
434			subHeaderList.append(subHeader)
435
436
437		lastFirstByte = -1
438		items = zip(charCodes, gids)
439		for charCode, gid in items:
440			if gid == 0:
441				continue
442			firstbyte = charCode >> 8
443			secondByte = charCode & 0x00FF
444
445			if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one.
446				if lastFirstByte > -1:
447					# fix GI's and iDelta of current subheader.
448					self.setIDDelta(subHeader)
449
450					# If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero
451					# for the indices matching the char codes.
452					if lastFirstByte == 0:
453						for index in range(subHeader.entryCount):
454							charCode = subHeader.firstCode + index
455							subHeaderKeys[charCode] = 0
456
457					assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange."
458				# init new subheader
459				subHeader = SubHeader()
460				subHeader.firstCode = secondByte
461				subHeader.entryCount = 1
462				subHeader.glyphIndexArray.append(gid)
463				subHeaderList.append(subHeader)
464				subHeaderKeys[firstbyte] = len(subHeaderList) -1
465				lastFirstByte = firstbyte
466			else:
467				# need to fill in with notdefs all the code points between the last charCode and the current charCode.
468				codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount)
469				for i in range(codeDiff):
470					subHeader.glyphIndexArray.append(notdefGI)
471				subHeader.glyphIndexArray.append(gid)
472				subHeader.entryCount = subHeader.entryCount + codeDiff + 1
473
474		# fix GI's and iDelta of last subheader that we we added to the subheader array.
475		self.setIDDelta(subHeader)
476
477		# Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges.
478		subHeader = SubHeader()
479		subHeader.firstCode = 0
480		subHeader.entryCount = 0
481		subHeader.idDelta = 0
482		subHeader.idRangeOffset = 2
483		subHeaderList.append(subHeader)
484		emptySubheadIndex = len(subHeaderList) - 1
485		for index in range(256):
486			if subHeaderKeys[index] == kEmptyTwoCharCodeRange:
487				subHeaderKeys[index] = emptySubheadIndex
488		# Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the
489		# idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray,
490		# since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with
491		# charcode 0 and GID 0.
492
493		idRangeOffset = (len(subHeaderList)-1)*8  + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset.
494		subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2.
495		for index in range(subheadRangeLen):
496			subHeader = subHeaderList[index]
497			subHeader.idRangeOffset = 0
498			for j  in range(index):
499				prevSubhead = subHeaderList[j]
500				if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray
501					subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8
502					subHeader.glyphIndexArray = []
503					break
504			if subHeader.idRangeOffset == 0: # didn't find one.
505				subHeader.idRangeOffset = idRangeOffset
506				idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray.
507			else:
508				idRangeOffset = idRangeOffset - 8  # one less subheader
509
510		# Now we can write out the data!
511		length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array.
512		for subhead in 	subHeaderList[:-1]:
513			length = length + len(subhead.glyphIndexArray)*2  # We can't use subhead.entryCount, as some of the subhead may share subArrays.
514		dataList = [struct.pack(">HHH", 2, length, self.language)]
515		for index in subHeaderKeys:
516			dataList.append(struct.pack(">H", index*8))
517		for subhead in 	subHeaderList:
518			dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset))
519		for subhead in 	subHeaderList[:-1]:
520			for gi in subhead.glyphIndexArray:
521				dataList.append(struct.pack(">H", gi))
522		data = bytesjoin(dataList)
523		assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length)
524		return data
525
526
527	def fromXML(self, name, attrs, content, ttFont):
528		self.language = safeEval(attrs["language"])
529		if not hasattr(self, "cmap"):
530			self.cmap = {}
531		cmap = self.cmap
532
533		for element in content:
534			if not isinstance(element, tuple):
535				continue
536			name, attrs, content = element
537			if name != "map":
538				continue
539			cmap[safeEval(attrs["code"])] = attrs["name"]
540
541
542cmap_format_4_format = ">7H"
543
544#uint16  endCode[segCount]          # Ending character code for each segment, last = 0xFFFF.
545#uint16  reservedPad                # This value should be zero
546#uint16  startCode[segCount]        # Starting character code for each segment
547#uint16  idDelta[segCount]          # Delta for all character codes in segment
548#uint16  idRangeOffset[segCount]    # Offset in bytes to glyph indexArray, or 0
549#uint16  glyphIndexArray[variable]  # Glyph index array
550
551def splitRange(startCode, endCode, cmap):
552	# Try to split a range of character codes into subranges with consecutive
553	# glyph IDs in such a way that the cmap4 subtable can be stored "most"
554	# efficiently. I can't prove I've got the optimal solution, but it seems
555	# to do well with the fonts I tested: none became bigger, many became smaller.
556	if startCode == endCode:
557		return [], [endCode]
558
559	lastID = cmap[startCode]
560	lastCode = startCode
561	inOrder = None
562	orderedBegin = None
563	subRanges = []
564
565	# Gather subranges in which the glyph IDs are consecutive.
566	for code in range(startCode + 1, endCode + 1):
567		glyphID = cmap[code]
568
569		if glyphID - 1 == lastID:
570			if inOrder is None or not inOrder:
571				inOrder = 1
572				orderedBegin = lastCode
573		else:
574			if inOrder:
575				inOrder = 0
576				subRanges.append((orderedBegin, lastCode))
577				orderedBegin = None
578
579		lastID = glyphID
580		lastCode = code
581
582	if inOrder:
583		subRanges.append((orderedBegin, lastCode))
584	assert lastCode == endCode
585
586	# Now filter out those new subranges that would only make the data bigger.
587	# A new segment cost 8 bytes, not using a new segment costs 2 bytes per
588	# character.
589	newRanges = []
590	for b, e in subRanges:
591		if b == startCode and e == endCode:
592			break  # the whole range, we're fine
593		if b == startCode or e == endCode:
594			threshold = 4  # split costs one more segment
595		else:
596			threshold = 8  # split costs two more segments
597		if (e - b + 1) > threshold:
598			newRanges.append((b, e))
599	subRanges = newRanges
600
601	if not subRanges:
602		return [], [endCode]
603
604	if subRanges[0][0] != startCode:
605		subRanges.insert(0, (startCode, subRanges[0][0] - 1))
606	if subRanges[-1][1] != endCode:
607		subRanges.append((subRanges[-1][1] + 1, endCode))
608
609	# Fill the "holes" in the segments list -- those are the segments in which
610	# the glyph IDs are _not_ consecutive.
611	i = 1
612	while i < len(subRanges):
613		if subRanges[i-1][1] + 1 != subRanges[i][0]:
614			subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1))
615			i = i + 1
616		i = i + 1
617
618	# Transform the ranges into startCode/endCode lists.
619	start = []
620	end = []
621	for b, e in subRanges:
622		start.append(b)
623		end.append(e)
624	start.pop(0)
625
626	assert len(start) + 1 == len(end)
627	return start, end
628
629
630class cmap_format_4(CmapSubtable):
631
632	def decompile(self, data, ttFont):
633		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
634		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
635		if data is not None and ttFont is not None:
636			self.decompileHeader(self.data[offset:offset+int(length)], ttFont)
637		else:
638			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
639
640		data = self.data # decompileHeader assigns the data after the header to self.data
641		(segCountX2, searchRange, entrySelector, rangeShift) = \
642					struct.unpack(">4H", data[:8])
643		data = data[8:]
644		segCount = segCountX2 // 2
645
646		allCodes = array.array("H")
647		allCodes.fromstring(data)
648		self.data = data = None
649
650		if sys.byteorder != "big":
651			allCodes.byteswap()
652
653		# divide the data
654		endCode = allCodes[:segCount]
655		allCodes = allCodes[segCount+1:]  # the +1 is skipping the reservedPad field
656		startCode = allCodes[:segCount]
657		allCodes = allCodes[segCount:]
658		idDelta = allCodes[:segCount]
659		allCodes = allCodes[segCount:]
660		idRangeOffset = allCodes[:segCount]
661		glyphIndexArray = allCodes[segCount:]
662		lenGIArray = len(glyphIndexArray)
663
664		# build 2-byte character mapping
665		charCodes = []
666		gids = []
667		for i in range(len(startCode) - 1):	# don't do 0xffff!
668			rangeCharCodes = list(range(startCode[i], endCode[i] + 1))
669			charCodes.extend(rangeCharCodes)
670			for charCode in rangeCharCodes:
671				rangeOffset = idRangeOffset[i]
672				if rangeOffset == 0:
673					glyphID = charCode + idDelta[i]
674				else:
675					# *someone* needs to get killed.
676					index = rangeOffset // 2 + (charCode - startCode[i]) + i - len(idRangeOffset)
677					assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array  is not less than the length of the array (%d) !" % (i, index, lenGIArray)
678					if glyphIndexArray[index] != 0:  # if not missing glyph
679						glyphID = glyphIndexArray[index] + idDelta[i]
680					else:
681						glyphID = 0  # missing glyph
682				gids.append(glyphID % 0x10000)
683
684		self.cmap = cmap = {}
685		lenCmap = len(gids)
686		glyphOrder = self.ttFont.getGlyphOrder()
687		try:
688			names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids ))
689		except IndexError:
690			getGlyphName = self.ttFont.getGlyphName
691			names = list(map(getGlyphName, gids ))
692		list(map(operator.setitem, [cmap]*lenCmap, charCodes, names))
693
694
695
696	def setIDDelta(self, idDelta):
697		# The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
698		# idDelta is a short, and must be between -32K and 32K
699		# startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1
700		# This means that we have a problem because we can need to assign to idDelta values
701		# between -(64K-2) and 64K -1.
702		# Since the final gi is reconstructed from the glyphArray GID by:
703		#    (short)finalGID = (gid +  idDelta) % 0x10000),
704		# we can get from a startCode of 0 to a final GID of 64 -1K by subtracting 1, and casting the
705		# negative number to an unsigned short.
706		# Similarly , we can get from a startCode of 64K-1 to a final GID of 1 by adding 2, because of
707		# the modulo arithmetic.
708
709		if idDelta > 0x7FFF:
710			idDelta = idDelta - 0x10000
711		elif idDelta <  -0x7FFF:
712			idDelta = idDelta + 0x10000
713
714		return idDelta
715
716
717	def compile(self, ttFont):
718		if self.data:
719			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
720
721		from fontTools.ttLib.sfnt import maxPowerOfTwo
722
723		charCodes = list(self.cmap.keys())
724		lenCharCodes = len(charCodes)
725		if lenCharCodes == 0:
726			startCode = [0xffff]
727			endCode = [0xffff]
728		else:
729			charCodes.sort()
730			names = list(map(operator.getitem, [self.cmap]*lenCharCodes, charCodes))
731			nameMap = ttFont.getReverseGlyphMap()
732			try:
733				gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
734			except KeyError:
735				nameMap = ttFont.getReverseGlyphMap(rebuild=True)
736				try:
737					gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
738				except KeyError:
739					# allow virtual GIDs in format 4 tables
740					gids = []
741					for name in names:
742						try:
743							gid = nameMap[name]
744						except KeyError:
745							try:
746								if (name[:3] == 'gid'):
747									gid = eval(name[3:])
748								else:
749									gid = ttFont.getGlyphID(name)
750							except:
751								raise KeyError(name)
752
753						gids.append(gid)
754			cmap = {}  # code:glyphID mapping
755			list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids))
756
757			# Build startCode and endCode lists.
758			# Split the char codes in ranges of consecutive char codes, then split
759			# each range in more ranges of consecutive/not consecutive glyph IDs.
760			# See splitRange().
761			lastCode = charCodes[0]
762			endCode = []
763			startCode = [lastCode]
764			for charCode in charCodes[1:]:  # skip the first code, it's the first start code
765				if charCode == lastCode + 1:
766					lastCode = charCode
767					continue
768				start, end = splitRange(startCode[-1], lastCode, cmap)
769				startCode.extend(start)
770				endCode.extend(end)
771				startCode.append(charCode)
772				lastCode = charCode
773			endCode.append(lastCode)
774			startCode.append(0xffff)
775			endCode.append(0xffff)
776
777		# build up rest of cruft
778		idDelta = []
779		idRangeOffset = []
780		glyphIndexArray = []
781		for i in range(len(endCode)-1):  # skip the closing codes (0xffff)
782			indices = []
783			for charCode in range(startCode[i], endCode[i] + 1):
784				indices.append(cmap[charCode])
785			if  (indices == list(range(indices[0], indices[0] + len(indices)))):
786				idDeltaTemp = self.setIDDelta(indices[0] - startCode[i])
787				idDelta.append( idDeltaTemp)
788				idRangeOffset.append(0)
789			else:
790				# someone *definitely* needs to get killed.
791				idDelta.append(0)
792				idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i))
793				glyphIndexArray.extend(indices)
794		idDelta.append(1)  # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef
795		idRangeOffset.append(0)
796
797		# Insane.
798		segCount = len(endCode)
799		segCountX2 = segCount * 2
800		maxExponent = maxPowerOfTwo(segCount)
801		searchRange = 2 * (2 ** maxExponent)
802		entrySelector = maxExponent
803		rangeShift = 2 * segCount - searchRange
804
805		charCodeArray = array.array("H", endCode + [0] + startCode)
806		idDeltaeArray = array.array("h", idDelta)
807		restArray = array.array("H", idRangeOffset + glyphIndexArray)
808		if sys.byteorder != "big":
809			charCodeArray.byteswap()
810			idDeltaeArray.byteswap()
811			restArray.byteswap()
812		data = charCodeArray.tostring() + idDeltaeArray.tostring() + restArray.tostring()
813
814		length = struct.calcsize(cmap_format_4_format) + len(data)
815		header = struct.pack(cmap_format_4_format, self.format, length, self.language,
816				segCountX2, searchRange, entrySelector, rangeShift)
817		return header + data
818
819	def fromXML(self, name, attrs, content, ttFont):
820		self.language = safeEval(attrs["language"])
821		if not hasattr(self, "cmap"):
822			self.cmap = {}
823		cmap = self.cmap
824
825		for element in content:
826			if not isinstance(element, tuple):
827				continue
828			nameMap, attrsMap, dummyContent = element
829			if nameMap != "map":
830				assert 0, "Unrecognized keyword in cmap subtable"
831			cmap[safeEval(attrsMap["code"])] = attrsMap["name"]
832
833
834class cmap_format_6(CmapSubtable):
835
836	def decompile(self, data, ttFont):
837		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
838		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
839		if data is not None and ttFont is not None:
840			self.decompileHeader(data[offset:offset+int(length)], ttFont)
841		else:
842			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
843
844		data = self.data # decompileHeader assigns the data after the header to self.data
845		firstCode, entryCount = struct.unpack(">HH", data[:4])
846		firstCode = int(firstCode)
847		data = data[4:]
848		#assert len(data) == 2 * entryCount  # XXX not true in Apple's Helvetica!!!
849		glyphIndexArray = array.array("H")
850		glyphIndexArray.fromstring(data[:2 * int(entryCount)])
851		if sys.byteorder != "big":
852			glyphIndexArray.byteswap()
853		self.data = data = None
854
855		self.cmap = cmap = {}
856
857		lenArray = len(glyphIndexArray)
858		charCodes = list(range(firstCode, firstCode + lenArray))
859		glyphOrder = self.ttFont.getGlyphOrder()
860		try:
861			names = list(map(operator.getitem, [glyphOrder]*lenArray, glyphIndexArray ))
862		except IndexError:
863			getGlyphName = self.ttFont.getGlyphName
864			names = list(map(getGlyphName, glyphIndexArray ))
865		list(map(operator.setitem, [cmap]*lenArray, charCodes, names))
866
867	def compile(self, ttFont):
868		if self.data:
869			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
870		cmap = self.cmap
871		codes = list(cmap.keys())
872		if codes: # yes, there are empty cmap tables.
873			codes = list(range(codes[0], codes[-1] + 1))
874			firstCode = codes[0]
875			valueList = [cmap.get(code, ".notdef") for code in codes]
876			valueList = map(ttFont.getGlyphID, valueList)
877			glyphIndexArray = array.array("H", valueList)
878			if sys.byteorder != "big":
879				glyphIndexArray.byteswap()
880			data = glyphIndexArray.tostring()
881		else:
882			data = b""
883			firstCode = 0
884		header = struct.pack(">HHHHH",
885				6, len(data) + 10, self.language, firstCode, len(codes))
886		return header + data
887
888	def fromXML(self, name, attrs, content, ttFont):
889		self.language = safeEval(attrs["language"])
890		if not hasattr(self, "cmap"):
891			self.cmap = {}
892		cmap = self.cmap
893
894		for element in content:
895			if not isinstance(element, tuple):
896				continue
897			name, attrs, content = element
898			if name != "map":
899				continue
900			cmap[safeEval(attrs["code"])] = attrs["name"]
901
902
903class cmap_format_12_or_13(CmapSubtable):
904
905	def __init__(self, format):
906		self.format = format
907		self.reserved = 0
908		self.data = None
909		self.ttFont = None
910
911	def decompileHeader(self, data, ttFont):
912		format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16])
913		assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length)
914		self.format = format
915		self.reserved = reserved
916		self.length = length
917		self.language = language
918		self.nGroups = nGroups
919		self.data = data[16:]
920		self.ttFont = ttFont
921
922	def decompile(self, data, ttFont):
923		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
924		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
925		if data is not None and ttFont is not None:
926			self.decompileHeader(data[offset:offset+int(length)], ttFont)
927		else:
928			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
929
930		data = self.data # decompileHeader assigns the data after the header to self.data
931		charCodes = []
932		gids = []
933		pos = 0
934		for i in range(self.nGroups):
935			startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] )
936			pos += 12
937			lenGroup = 1 + endCharCode - startCharCode
938			charCodes += list(range(startCharCode, endCharCode +1))
939			gids += self._computeGIDs(glyphID, lenGroup)
940		self.data = data = None
941		self.cmap = cmap = {}
942		lenCmap = len(gids)
943		glyphOrder = self.ttFont.getGlyphOrder()
944		try:
945			names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids ))
946		except IndexError:
947			getGlyphName = self.ttFont.getGlyphName
948			names = list(map(getGlyphName, gids ))
949		list(map(operator.setitem, [cmap]*lenCmap, charCodes, names))
950
951	def compile(self, ttFont):
952		if self.data:
953			return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data
954		charCodes = list(self.cmap.keys())
955		lenCharCodes = len(charCodes)
956		names = list(self.cmap.values())
957		nameMap = ttFont.getReverseGlyphMap()
958		try:
959			gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
960		except KeyError:
961			nameMap = ttFont.getReverseGlyphMap(rebuild=True)
962			try:
963				gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
964			except KeyError:
965				# allow virtual GIDs in format 12 tables
966				gids = []
967				for name in names:
968					try:
969						gid = nameMap[name]
970					except KeyError:
971						try:
972							if (name[:3] == 'gid'):
973								gid = eval(name[3:])
974							else:
975								gid = ttFont.getGlyphID(name)
976						except:
977							raise KeyError(name)
978
979					gids.append(gid)
980
981		cmap = {}  # code:glyphID mapping
982		list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids))
983
984		charCodes.sort()
985		index = 0
986		startCharCode = charCodes[0]
987		startGlyphID = cmap[startCharCode]
988		lastGlyphID = startGlyphID - self._format_step
989		lastCharCode = startCharCode - 1
990		nGroups = 0
991		dataList =  []
992		maxIndex = len(charCodes)
993		for index in range(maxIndex):
994			charCode = charCodes[index]
995			glyphID = cmap[charCode]
996			if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode):
997				dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
998				startCharCode = charCode
999				startGlyphID = glyphID
1000				nGroups = nGroups + 1
1001			lastGlyphID = glyphID
1002			lastCharCode = charCode
1003		dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
1004		nGroups = nGroups + 1
1005		data = bytesjoin(dataList)
1006		lengthSubtable = len(data) +16
1007		assert len(data) == (nGroups*12) == (lengthSubtable-16)
1008		return struct.pack(">HHLLL", self.format, self.reserved , lengthSubtable, self.language, nGroups) + data
1009
1010	def toXML(self, writer, ttFont):
1011		writer.begintag(self.__class__.__name__, [
1012				("platformID", self.platformID),
1013				("platEncID", self.platEncID),
1014				("format", self.format),
1015				("reserved", self.reserved),
1016				("length", self.length),
1017				("language", self.language),
1018				("nGroups", self.nGroups),
1019				])
1020		writer.newline()
1021		codes = sorted(self.cmap.items())
1022		self._writeCodes(codes, writer)
1023		writer.endtag(self.__class__.__name__)
1024		writer.newline()
1025
1026	def fromXML(self, name, attrs, content, ttFont):
1027		self.format = safeEval(attrs["format"])
1028		self.reserved = safeEval(attrs["reserved"])
1029		self.length = safeEval(attrs["length"])
1030		self.language = safeEval(attrs["language"])
1031		self.nGroups = safeEval(attrs["nGroups"])
1032		if not hasattr(self, "cmap"):
1033			self.cmap = {}
1034		cmap = self.cmap
1035
1036		for element in content:
1037			if not isinstance(element, tuple):
1038				continue
1039			name, attrs, content = element
1040			if name != "map":
1041				continue
1042			cmap[safeEval(attrs["code"])] = attrs["name"]
1043
1044
1045class cmap_format_12(cmap_format_12_or_13):
1046	def __init__(self, format):
1047		cmap_format_12_or_13.__init__(self, format)
1048		self._format_step = 1
1049
1050	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
1051		return list(range(startingGlyph, startingGlyph + numberOfGlyphs))
1052
1053	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
1054		return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode)
1055
1056
1057class cmap_format_13(cmap_format_12_or_13):
1058	def __init__(self, format):
1059		cmap_format_12_or_13.__init__(self, format)
1060		self._format_step = 0
1061
1062	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
1063		return [startingGlyph] * numberOfGlyphs
1064
1065	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
1066		return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode)
1067
1068
1069def  cvtToUVS(threeByteString):
1070	data = b"\0" + threeByteString
1071	val, = struct.unpack(">L", data)
1072	return val
1073
1074def  cvtFromUVS(val):
1075	assert 0 <= val < 0x1000000
1076	fourByteString = struct.pack(">L", val)
1077	return fourByteString[1:]
1078
1079
1080class cmap_format_14(CmapSubtable):
1081
1082	def decompileHeader(self, data, ttFont):
1083		format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10])
1084		self.data = data[10:]
1085		self.length = length
1086		self.numVarSelectorRecords = numVarSelectorRecords
1087		self.ttFont = ttFont
1088		self.language = 0xFF # has no language.
1089
1090	def decompile(self, data, ttFont):
1091		if data is not None and ttFont is not None and ttFont.lazy:
1092			self.decompileHeader(data, ttFont)
1093		else:
1094			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
1095		data = self.data
1096
1097		self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1098		uvsDict = {}
1099		recOffset = 0
1100		for n in range(self.numVarSelectorRecords):
1101			uvs, defOVSOffset, nonDefUVSOffset =  struct.unpack(">3sLL", data[recOffset:recOffset +11])
1102			recOffset += 11
1103			varUVS = cvtToUVS(uvs)
1104			if defOVSOffset:
1105				startOffset = defOVSOffset  - 10
1106				numValues, = struct.unpack(">L", data[startOffset:startOffset+4])
1107				startOffset +=4
1108				for r in range(numValues):
1109					uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4])
1110					startOffset += 4
1111					firstBaseUV = cvtToUVS(uv)
1112					cnt = addtlCnt+1
1113					baseUVList = list(range(firstBaseUV, firstBaseUV+cnt))
1114					glyphList = [None]*cnt
1115					localUVList = zip(baseUVList, glyphList)
1116					try:
1117						uvsDict[varUVS].extend(localUVList)
1118					except KeyError:
1119						uvsDict[varUVS] = list(localUVList)
1120
1121			if nonDefUVSOffset:
1122				startOffset = nonDefUVSOffset  - 10
1123				numRecs, = struct.unpack(">L", data[startOffset:startOffset+4])
1124				startOffset +=4
1125				localUVList = []
1126				for r in range(numRecs):
1127					uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5])
1128					startOffset += 5
1129					uv = cvtToUVS(uv)
1130					glyphName = self.ttFont.getGlyphName(gid)
1131					localUVList.append( [uv, glyphName] )
1132				try:
1133					uvsDict[varUVS].extend(localUVList)
1134				except KeyError:
1135					uvsDict[varUVS] = localUVList
1136
1137		self.uvsDict = uvsDict
1138
1139	def toXML(self, writer, ttFont):
1140		writer.begintag(self.__class__.__name__, [
1141				("platformID", self.platformID),
1142				("platEncID", self.platEncID),
1143				("format", self.format),
1144				("length", self.length),
1145				("numVarSelectorRecords", self.numVarSelectorRecords),
1146				])
1147		writer.newline()
1148		uvsDict = self.uvsDict
1149		uvsList = sorted(uvsDict.keys())
1150		for uvs in uvsList:
1151			uvList = uvsDict[uvs]
1152			uvList.sort(key=lambda item: (item[1] is not None, item[0], item[1]))
1153			for uv, gname in uvList:
1154				if gname is None:
1155					gname = "None"
1156				# I use the arg rather than th keyword syntax in order to preserve the attribute order.
1157				writer.simpletag("map", [ ("uvs",hex(uvs)), ("uv",hex(uv)), ("name", gname)]  )
1158				writer.newline()
1159		writer.endtag(self.__class__.__name__)
1160		writer.newline()
1161
1162	def fromXML(self, name, attrs, content, ttFont):
1163		self.format = safeEval(attrs["format"])
1164		self.length = safeEval(attrs["length"])
1165		self.numVarSelectorRecords = safeEval(attrs["numVarSelectorRecords"])
1166		self.language = 0xFF # provide a value so that  CmapSubtable.__lt__() won't fail
1167		if not hasattr(self, "cmap"):
1168			self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1169		if not hasattr(self, "uvsDict"):
1170			self.uvsDict  = {}
1171			uvsDict = self.uvsDict
1172
1173		for element in content:
1174			if not isinstance(element, tuple):
1175				continue
1176			name, attrs, content = element
1177			if name != "map":
1178				continue
1179			uvs = safeEval(attrs["uvs"])
1180			uv = safeEval(attrs["uv"])
1181			gname = attrs["name"]
1182			if gname == "None":
1183				gname = None
1184			try:
1185				uvsDict[uvs].append( [uv, gname])
1186			except KeyError:
1187				uvsDict[uvs] = [ [uv, gname] ]
1188
1189
1190	def compile(self, ttFont):
1191		if self.data:
1192			return struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) + self.data
1193
1194		uvsDict = self.uvsDict
1195		uvsList = sorted(uvsDict.keys())
1196		self.numVarSelectorRecords = len(uvsList)
1197		offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block.
1198		data = []
1199		varSelectorRecords =[]
1200		for uvs in uvsList:
1201			entryList = uvsDict[uvs]
1202
1203			defList = [entry for entry in entryList if entry[1] is None]
1204			if defList:
1205				defList = [entry[0] for entry in defList]
1206				defOVSOffset = offset
1207				defList.sort()
1208
1209				lastUV = defList[0]
1210				cnt = -1
1211				defRecs = []
1212				for defEntry in defList:
1213					cnt +=1
1214					if (lastUV+cnt) != defEntry:
1215						rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1)
1216						lastUV = defEntry
1217						defRecs.append(rec)
1218						cnt = 0
1219
1220				rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt)
1221				defRecs.append(rec)
1222
1223				numDefRecs = len(defRecs)
1224				data.append(struct.pack(">L", numDefRecs))
1225				data.extend(defRecs)
1226				offset += 4 + numDefRecs*4
1227			else:
1228				defOVSOffset = 0
1229
1230			ndefList = [entry for entry in entryList if entry[1] is not None]
1231			if ndefList:
1232				nonDefUVSOffset = offset
1233				ndefList.sort()
1234				numNonDefRecs = len(ndefList)
1235				data.append(struct.pack(">L", numNonDefRecs))
1236				offset += 4 + numNonDefRecs*5
1237
1238				for uv, gname in ndefList:
1239					gid = ttFont.getGlyphID(gname)
1240					ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid)
1241					data.append(ndrec)
1242			else:
1243				nonDefUVSOffset = 0
1244
1245			vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset)
1246			varSelectorRecords.append(vrec)
1247
1248		data = bytesjoin(varSelectorRecords) + bytesjoin(data)
1249		self.length = 10 + len(data)
1250		headerdata = struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords)
1251		self.data = headerdata + data
1252
1253		return self.data
1254
1255
1256class cmap_format_unknown(CmapSubtable):
1257
1258	def toXML(self, writer, ttFont):
1259		cmapName = self.__class__.__name__[:12] + str(self.format)
1260		writer.begintag(cmapName, [
1261				("platformID", self.platformID),
1262				("platEncID", self.platEncID),
1263				])
1264		writer.newline()
1265		writer.dumphex(self.data)
1266		writer.endtag(cmapName)
1267		writer.newline()
1268
1269	def fromXML(self, name, attrs, content, ttFont):
1270		self.data = readHex(content)
1271		self.cmap = {}
1272
1273	def decompileHeader(self, data, ttFont):
1274		self.language = 0  # dummy value
1275		self.data = data
1276
1277	def decompile(self, data, ttFont):
1278		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
1279		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
1280		if data is not None and ttFont is not None:
1281			self.decompileHeader(data[offset:offset+int(length)], ttFont)
1282		else:
1283			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
1284
1285	def compile(self, ttFont):
1286		if self.data:
1287			return self.data
1288		else:
1289			return None
1290
1291cmap_classes = {
1292		0: cmap_format_0,
1293		2: cmap_format_2,
1294		4: cmap_format_4,
1295		6: cmap_format_6,
1296		12: cmap_format_12,
1297		13: cmap_format_13,
1298		14: cmap_format_14,
1299		}
1300