1from __future__ import print_function, division, absolute_import
2from fontTools.misc.py23 import *
3from fontTools.misc.textTools import safeEval, readHex
4from fontTools.ttLib import getSearchRange
5from fontTools.unicode import Unicode
6from . import DefaultTable
7import sys
8import struct
9import array
10import operator
11
12
13class table__c_m_a_p(DefaultTable.DefaultTable):
14
15	def getcmap(self, platformID, platEncID):
16		for subtable in self.tables:
17			if (subtable.platformID == platformID and
18					subtable.platEncID == platEncID):
19				return subtable
20		return None # not found
21
22	def decompile(self, data, ttFont):
23		tableVersion, numSubTables = struct.unpack(">HH", data[:4])
24		self.tableVersion = int(tableVersion)
25		self.tables = tables = []
26		seenOffsets = {}
27		for i in range(numSubTables):
28			platformID, platEncID, offset = struct.unpack(
29					">HHl", data[4+i*8:4+(i+1)*8])
30			platformID, platEncID = int(platformID), int(platEncID)
31			format, length = struct.unpack(">HH", data[offset:offset+4])
32			if format in [8,10,12,13]:
33				format, reserved, length = struct.unpack(">HHL", data[offset:offset+8])
34			elif format in [14]:
35				format, length = struct.unpack(">HL", data[offset:offset+6])
36
37			if not length:
38				print("Error: cmap subtable is reported as having zero length: platformID %s, platEncID %s,  format %s offset %s. Skipping table." % (platformID, platEncID,format, offset))
39				continue
40			if format not in cmap_classes:
41				table = cmap_format_unknown(format)
42			else:
43				table = cmap_classes[format](format)
44			table.platformID = platformID
45			table.platEncID = platEncID
46			# Note that by default we decompile only the subtable header info;
47			# any other data gets decompiled only when an attribute of the
48			# subtable is referenced.
49			table.decompileHeader(data[offset:offset+int(length)], ttFont)
50			if offset in seenOffsets:
51				table.cmap = tables[seenOffsets[offset]].cmap
52			else:
53				seenOffsets[offset] = i
54			tables.append(table)
55
56	def compile(self, ttFont):
57		self.tables.sort()    # sort according to the spec; see CmapSubtable.__lt__()
58		numSubTables = len(self.tables)
59		totalOffset = 4 + 8 * numSubTables
60		data = struct.pack(">HH", self.tableVersion, numSubTables)
61		tableData = b""
62		seen = {}  # Some tables are the same object reference. Don't compile them twice.
63		done = {}  # Some tables are different objects, but compile to the same data chunk
64		for table in self.tables:
65			try:
66				offset = seen[id(table.cmap)]
67			except KeyError:
68				chunk = table.compile(ttFont)
69				if chunk in done:
70					offset = done[chunk]
71				else:
72					offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData)
73					tableData = tableData + chunk
74			data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset)
75		return data + tableData
76
77	def toXML(self, writer, ttFont):
78		writer.simpletag("tableVersion", version=self.tableVersion)
79		writer.newline()
80		for table in self.tables:
81			table.toXML(writer, ttFont)
82
83	def fromXML(self, name, attrs, content, ttFont):
84		if name == "tableVersion":
85			self.tableVersion = safeEval(attrs["version"])
86			return
87		if name[:12] != "cmap_format_":
88			return
89		if not hasattr(self, "tables"):
90			self.tables = []
91		format = safeEval(name[12:])
92		if format not in cmap_classes:
93			table = cmap_format_unknown(format)
94		else:
95			table = cmap_classes[format](format)
96		table.platformID = safeEval(attrs["platformID"])
97		table.platEncID = safeEval(attrs["platEncID"])
98		table.fromXML(name, attrs, content, ttFont)
99		self.tables.append(table)
100
101
102class CmapSubtable(object):
103
104	def __init__(self, format):
105		self.format = format
106		self.data = None
107		self.ttFont = None
108
109	def __getattr__(self, attr):
110		# allow lazy decompilation of subtables.
111		if attr[:2] == '__': # don't handle requests for member functions like '__lt__'
112			raise AttributeError(attr)
113		if self.data is None:
114			raise AttributeError(attr)
115		self.decompile(None, None) # use saved data.
116		self.data = None # Once this table has been decompiled, make sure we don't
117						# just return the original data. Also avoids recursion when
118						# called with an attribute that the cmap subtable doesn't have.
119		return getattr(self, attr)
120
121	def decompileHeader(self, data, ttFont):
122		format, length, language = struct.unpack(">HHH", data[:6])
123		assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length)
124		self.format = int(format)
125		self.length = int(length)
126		self.language = int(language)
127		self.data = data[6:]
128		self.ttFont = ttFont
129
130	def toXML(self, writer, ttFont):
131		writer.begintag(self.__class__.__name__, [
132				("platformID", self.platformID),
133				("platEncID", self.platEncID),
134				("language", self.language),
135				])
136		writer.newline()
137		codes = sorted(self.cmap.items())
138		self._writeCodes(codes, writer)
139		writer.endtag(self.__class__.__name__)
140		writer.newline()
141
142	def isUnicode(self):
143		return (self.platformID == 0 or
144			(self.platformID == 3 and self.platEncID in [1, 10]))
145
146	def isSymbol(self):
147		return self.platformID == 3 and self.platEncID == 0
148
149	def _writeCodes(self, codes, writer):
150		isUnicode = self.isUnicode()
151		for code, name in codes:
152			writer.simpletag("map", code=hex(code), name=name)
153			if isUnicode:
154				writer.comment(Unicode[code])
155			writer.newline()
156
157	def __lt__(self, other):
158		if not isinstance(other, CmapSubtable):
159			return NotImplemented
160
161		# implemented so that list.sort() sorts according to the spec.
162		selfTuple = (
163			getattr(self, "platformID", None),
164			getattr(self, "platEncID", None),
165			getattr(self, "language", None),
166			self.__dict__)
167		otherTuple = (
168			getattr(other, "platformID", None),
169			getattr(other, "platEncID", None),
170			getattr(other, "language", None),
171			other.__dict__)
172		return selfTuple < otherTuple
173
174
175class cmap_format_0(CmapSubtable):
176
177	def decompile(self, data, ttFont):
178		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
179		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
180		if data is not None and ttFont is not None:
181			self.decompileHeader(data[offset:offset+int(length)], ttFont)
182		else:
183			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
184		data = self.data # decompileHeader assigns the data after the header to self.data
185		assert 262 == self.length, "Format 0 cmap subtable not 262 bytes"
186		glyphIdArray = array.array("B")
187		glyphIdArray.fromstring(self.data)
188		self.cmap = cmap = {}
189		lenArray = len(glyphIdArray)
190		charCodes = list(range(lenArray))
191		names = map(self.ttFont.getGlyphName, glyphIdArray)
192		list(map(operator.setitem, [cmap]*lenArray, charCodes, names))
193
194
195	def compile(self, ttFont):
196		if self.data:
197			return struct.pack(">HHH", 0, 262, self.language) + self.data
198
199		charCodeList = sorted(self.cmap.items())
200		charCodes = [entry[0] for entry in charCodeList]
201		valueList = [entry[1] for entry in charCodeList]
202		assert charCodes == list(range(256))
203		valueList = map(ttFont.getGlyphID, valueList)
204
205		glyphIdArray = array.array("B", valueList)
206		data = struct.pack(">HHH", 0, 262, self.language) + glyphIdArray.tostring()
207		assert len(data) == 262
208		return data
209
210	def fromXML(self, name, attrs, content, ttFont):
211		self.language = safeEval(attrs["language"])
212		if not hasattr(self, "cmap"):
213			self.cmap = {}
214		cmap = self.cmap
215		for element in content:
216			if not isinstance(element, tuple):
217				continue
218			name, attrs, content = element
219			if name != "map":
220				continue
221			cmap[safeEval(attrs["code"])] = attrs["name"]
222
223
224subHeaderFormat = ">HHhH"
225class SubHeader(object):
226	def __init__(self):
227		self.firstCode = None
228		self.entryCount = None
229		self.idDelta = None
230		self.idRangeOffset = None
231		self.glyphIndexArray = []
232
233class cmap_format_2(CmapSubtable):
234
235	def setIDDelta(self, subHeader):
236		subHeader.idDelta = 0
237		# find the minGI which is not zero.
238		minGI = subHeader.glyphIndexArray[0]
239		for gid in subHeader.glyphIndexArray:
240			if (gid != 0) and (gid < minGI):
241				minGI = gid
242		# The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
243		# idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K.
244		# We would like to pick an idDelta such that the first glyphArray GID is 1,
245		# so that we are more likely to be able to combine glypharray GID subranges.
246		# This means that we have a problem when minGI is > 32K
247		# Since the final gi is reconstructed from the glyphArray GID by:
248		#    (short)finalGID = (gid +  idDelta) % 0x10000),
249		# we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the
250		# negative number to an unsigned short.
251
252		if  (minGI > 1):
253			if  minGI > 0x7FFF:
254				subHeader.idDelta = -(0x10000 - minGI) -1
255			else:
256				subHeader.idDelta =  minGI -1
257			idDelta = subHeader.idDelta
258			for i in range(subHeader.entryCount):
259				gid = subHeader.glyphIndexArray[i]
260				if gid > 0:
261					subHeader.glyphIndexArray[i] = gid - idDelta
262
263
264	def decompile(self, data, ttFont):
265		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
266		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
267		if data is not None and ttFont is not None:
268			self.decompileHeader(data[offset:offset+int(length)], ttFont)
269		else:
270			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
271
272		data = self.data # decompileHeader assigns the data after the header to self.data
273		subHeaderKeys = []
274		maxSubHeaderindex = 0
275		# get the key array, and determine the number of subHeaders.
276		allKeys = array.array("H")
277		allKeys.fromstring(data[:512])
278		data = data[512:]
279		if sys.byteorder != "big":
280			allKeys.byteswap()
281		subHeaderKeys = [ key//8 for key in allKeys]
282		maxSubHeaderindex = max(subHeaderKeys)
283
284		#Load subHeaders
285		subHeaderList = []
286		pos = 0
287		for i in range(maxSubHeaderindex + 1):
288			subHeader = SubHeader()
289			(subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \
290				subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8])
291			pos += 8
292			giDataPos = pos + subHeader.idRangeOffset-2
293			giList = array.array("H")
294			giList.fromstring(data[giDataPos:giDataPos + subHeader.entryCount*2])
295			if sys.byteorder != "big":
296				giList.byteswap()
297			subHeader.glyphIndexArray = giList
298			subHeaderList.append(subHeader)
299		# How this gets processed.
300		# Charcodes may be one or two bytes.
301		# The first byte of a charcode is mapped through the  subHeaderKeys, to select
302		# a subHeader. For any subheader but 0, the next byte is then mapped through the
303		# selected subheader. If subheader Index 0 is selected, then the byte itself is
304		# mapped through the subheader, and there is no second byte.
305		# Then assume that the subsequent byte is the first byte of the next charcode,and repeat.
306		#
307		# Each subheader references a range in the glyphIndexArray whose length is entryCount.
308		# The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray
309		# referenced by another subheader.
310		# The only subheader that will be referenced by more than one first-byte value is the subheader
311		# that maps the entire range of glyphID values to glyphIndex 0, e.g notdef:
312		#	 {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx}
313		# A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex.
314		# A subheader specifies a subrange within (0...256) by the
315		# firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero
316		# (e.g. glyph not in font).
317		# If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar).
318		# The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by
319		# counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the
320		# glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex.
321		# Example for Logocut-Medium
322		# first byte of charcode = 129; selects subheader 1.
323		# subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252}
324		# second byte of charCode = 66
325		# the index offset = 66-64 = 2.
326		# The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is:
327		# [glyphIndexArray index], [subrange array index] = glyphIndex
328		# [256], [0]=1 	from charcode [129, 64]
329		# [257], [1]=2  	from charcode [129, 65]
330		# [258], [2]=3  	from charcode [129, 66]
331		# [259], [3]=4  	from charcode [129, 67]
332		# So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero,
333		# add it to the glyphID to get the final glyphIndex
334		# value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew!
335
336		self.data = b""
337		self.cmap = cmap = {}
338		notdefGI = 0
339		for firstByte in range(256):
340			subHeadindex = subHeaderKeys[firstByte]
341			subHeader = subHeaderList[subHeadindex]
342			if subHeadindex == 0:
343				if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount):
344					continue # gi is notdef.
345				else:
346					charCode = firstByte
347					offsetIndex = firstByte - subHeader.firstCode
348					gi = subHeader.glyphIndexArray[offsetIndex]
349					if gi != 0:
350						gi = (gi + subHeader.idDelta) % 0x10000
351					else:
352						continue # gi is notdef.
353				cmap[charCode] = gi
354			else:
355				if subHeader.entryCount:
356					charCodeOffset = firstByte * 256 + subHeader.firstCode
357					for offsetIndex in range(subHeader.entryCount):
358						charCode = charCodeOffset + offsetIndex
359						gi = subHeader.glyphIndexArray[offsetIndex]
360						if gi != 0:
361							gi = (gi + subHeader.idDelta) % 0x10000
362						else:
363							continue
364						cmap[charCode] = gi
365				# If not subHeader.entryCount, then all char codes with this first byte are
366				# mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the
367				# same as mapping it to .notdef.
368		# cmap values are GID's.
369		glyphOrder = self.ttFont.getGlyphOrder()
370		gids = list(cmap.values())
371		charCodes = list(cmap.keys())
372		lenCmap = len(gids)
373		try:
374			names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids ))
375		except IndexError:
376			getGlyphName = self.ttFont.getGlyphName
377			names = list(map(getGlyphName, gids ))
378		list(map(operator.setitem, [cmap]*lenCmap, charCodes, names))
379
380
381	def compile(self, ttFont):
382		if self.data:
383			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
384		kEmptyTwoCharCodeRange = -1
385		notdefGI = 0
386
387		items = sorted(self.cmap.items())
388		charCodes = [item[0] for item in items]
389		names = [item[1] for item in items]
390		nameMap = ttFont.getReverseGlyphMap()
391		lenCharCodes = len(charCodes)
392		try:
393			gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
394		except KeyError:
395			nameMap = ttFont.getReverseGlyphMap(rebuild=True)
396			try:
397				gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
398			except KeyError:
399				# allow virtual GIDs in format 2 tables
400				gids = []
401				for name in names:
402					try:
403						gid = nameMap[name]
404					except KeyError:
405						try:
406							if (name[:3] == 'gid'):
407								gid = eval(name[3:])
408							else:
409								gid = ttFont.getGlyphID(name)
410						except:
411							raise KeyError(name)
412
413					gids.append(gid)
414
415		# Process the (char code to gid) item list  in char code order.
416		# By definition, all one byte char codes map to subheader 0.
417		# For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0,
418		# which defines all char codes in its range to map to notdef) unless proven otherwise.
419		# Note that since the char code items are processed in char code order, all the char codes with the
420		# same first byte are in sequential order.
421
422		subHeaderKeys = [ kEmptyTwoCharCodeRange for x in  range(256)] # list of indices into subHeaderList.
423		subHeaderList = []
424
425		# We force this subheader entry 0  to exist in the subHeaderList in the case where some one comes up
426		# with a cmap where all the one byte char codes map to notdef,
427		# with the result that the subhead 0 would not get created just by processing the item list.
428		charCode = charCodes[0]
429		if charCode > 255:
430			subHeader = SubHeader()
431			subHeader.firstCode = 0
432			subHeader.entryCount = 0
433			subHeader.idDelta = 0
434			subHeader.idRangeOffset = 0
435			subHeaderList.append(subHeader)
436
437
438		lastFirstByte = -1
439		items = zip(charCodes, gids)
440		for charCode, gid in items:
441			if gid == 0:
442				continue
443			firstbyte = charCode >> 8
444			secondByte = charCode & 0x00FF
445
446			if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one.
447				if lastFirstByte > -1:
448					# fix GI's and iDelta of current subheader.
449					self.setIDDelta(subHeader)
450
451					# If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero
452					# for the indices matching the char codes.
453					if lastFirstByte == 0:
454						for index in range(subHeader.entryCount):
455							charCode = subHeader.firstCode + index
456							subHeaderKeys[charCode] = 0
457
458					assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange."
459				# init new subheader
460				subHeader = SubHeader()
461				subHeader.firstCode = secondByte
462				subHeader.entryCount = 1
463				subHeader.glyphIndexArray.append(gid)
464				subHeaderList.append(subHeader)
465				subHeaderKeys[firstbyte] = len(subHeaderList) -1
466				lastFirstByte = firstbyte
467			else:
468				# need to fill in with notdefs all the code points between the last charCode and the current charCode.
469				codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount)
470				for i in range(codeDiff):
471					subHeader.glyphIndexArray.append(notdefGI)
472				subHeader.glyphIndexArray.append(gid)
473				subHeader.entryCount = subHeader.entryCount + codeDiff + 1
474
475		# fix GI's and iDelta of last subheader that we we added to the subheader array.
476		self.setIDDelta(subHeader)
477
478		# Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges.
479		subHeader = SubHeader()
480		subHeader.firstCode = 0
481		subHeader.entryCount = 0
482		subHeader.idDelta = 0
483		subHeader.idRangeOffset = 2
484		subHeaderList.append(subHeader)
485		emptySubheadIndex = len(subHeaderList) - 1
486		for index in range(256):
487			if subHeaderKeys[index] == kEmptyTwoCharCodeRange:
488				subHeaderKeys[index] = emptySubheadIndex
489		# Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the
490		# idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray,
491		# since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with
492		# charcode 0 and GID 0.
493
494		idRangeOffset = (len(subHeaderList)-1)*8  + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset.
495		subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2.
496		for index in range(subheadRangeLen):
497			subHeader = subHeaderList[index]
498			subHeader.idRangeOffset = 0
499			for j  in range(index):
500				prevSubhead = subHeaderList[j]
501				if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray
502					subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8
503					subHeader.glyphIndexArray = []
504					break
505			if subHeader.idRangeOffset == 0: # didn't find one.
506				subHeader.idRangeOffset = idRangeOffset
507				idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray.
508			else:
509				idRangeOffset = idRangeOffset - 8  # one less subheader
510
511		# Now we can write out the data!
512		length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array.
513		for subhead in 	subHeaderList[:-1]:
514			length = length + len(subhead.glyphIndexArray)*2  # We can't use subhead.entryCount, as some of the subhead may share subArrays.
515		dataList = [struct.pack(">HHH", 2, length, self.language)]
516		for index in subHeaderKeys:
517			dataList.append(struct.pack(">H", index*8))
518		for subhead in 	subHeaderList:
519			dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset))
520		for subhead in 	subHeaderList[:-1]:
521			for gi in subhead.glyphIndexArray:
522				dataList.append(struct.pack(">H", gi))
523		data = bytesjoin(dataList)
524		assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length)
525		return data
526
527
528	def fromXML(self, name, attrs, content, ttFont):
529		self.language = safeEval(attrs["language"])
530		if not hasattr(self, "cmap"):
531			self.cmap = {}
532		cmap = self.cmap
533
534		for element in content:
535			if not isinstance(element, tuple):
536				continue
537			name, attrs, content = element
538			if name != "map":
539				continue
540			cmap[safeEval(attrs["code"])] = attrs["name"]
541
542
543cmap_format_4_format = ">7H"
544
545#uint16  endCode[segCount]          # Ending character code for each segment, last = 0xFFFF.
546#uint16  reservedPad                # This value should be zero
547#uint16  startCode[segCount]        # Starting character code for each segment
548#uint16  idDelta[segCount]          # Delta for all character codes in segment
549#uint16  idRangeOffset[segCount]    # Offset in bytes to glyph indexArray, or 0
550#uint16  glyphIndexArray[variable]  # Glyph index array
551
552def splitRange(startCode, endCode, cmap):
553	# Try to split a range of character codes into subranges with consecutive
554	# glyph IDs in such a way that the cmap4 subtable can be stored "most"
555	# efficiently. I can't prove I've got the optimal solution, but it seems
556	# to do well with the fonts I tested: none became bigger, many became smaller.
557	if startCode == endCode:
558		return [], [endCode]
559
560	lastID = cmap[startCode]
561	lastCode = startCode
562	inOrder = None
563	orderedBegin = None
564	subRanges = []
565
566	# Gather subranges in which the glyph IDs are consecutive.
567	for code in range(startCode + 1, endCode + 1):
568		glyphID = cmap[code]
569
570		if glyphID - 1 == lastID:
571			if inOrder is None or not inOrder:
572				inOrder = 1
573				orderedBegin = lastCode
574		else:
575			if inOrder:
576				inOrder = 0
577				subRanges.append((orderedBegin, lastCode))
578				orderedBegin = None
579
580		lastID = glyphID
581		lastCode = code
582
583	if inOrder:
584		subRanges.append((orderedBegin, lastCode))
585	assert lastCode == endCode
586
587	# Now filter out those new subranges that would only make the data bigger.
588	# A new segment cost 8 bytes, not using a new segment costs 2 bytes per
589	# character.
590	newRanges = []
591	for b, e in subRanges:
592		if b == startCode and e == endCode:
593			break  # the whole range, we're fine
594		if b == startCode or e == endCode:
595			threshold = 4  # split costs one more segment
596		else:
597			threshold = 8  # split costs two more segments
598		if (e - b + 1) > threshold:
599			newRanges.append((b, e))
600	subRanges = newRanges
601
602	if not subRanges:
603		return [], [endCode]
604
605	if subRanges[0][0] != startCode:
606		subRanges.insert(0, (startCode, subRanges[0][0] - 1))
607	if subRanges[-1][1] != endCode:
608		subRanges.append((subRanges[-1][1] + 1, endCode))
609
610	# Fill the "holes" in the segments list -- those are the segments in which
611	# the glyph IDs are _not_ consecutive.
612	i = 1
613	while i < len(subRanges):
614		if subRanges[i-1][1] + 1 != subRanges[i][0]:
615			subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1))
616			i = i + 1
617		i = i + 1
618
619	# Transform the ranges into startCode/endCode lists.
620	start = []
621	end = []
622	for b, e in subRanges:
623		start.append(b)
624		end.append(e)
625	start.pop(0)
626
627	assert len(start) + 1 == len(end)
628	return start, end
629
630
631class cmap_format_4(CmapSubtable):
632
633	def decompile(self, data, ttFont):
634		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
635		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
636		if data is not None and ttFont is not None:
637			self.decompileHeader(self.data[offset:offset+int(length)], ttFont)
638		else:
639			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
640
641		data = self.data # decompileHeader assigns the data after the header to self.data
642		(segCountX2, searchRange, entrySelector, rangeShift) = \
643					struct.unpack(">4H", data[:8])
644		data = data[8:]
645		segCount = segCountX2 // 2
646
647		allCodes = array.array("H")
648		allCodes.fromstring(data)
649		self.data = data = None
650
651		if sys.byteorder != "big":
652			allCodes.byteswap()
653
654		# divide the data
655		endCode = allCodes[:segCount]
656		allCodes = allCodes[segCount+1:]  # the +1 is skipping the reservedPad field
657		startCode = allCodes[:segCount]
658		allCodes = allCodes[segCount:]
659		idDelta = allCodes[:segCount]
660		allCodes = allCodes[segCount:]
661		idRangeOffset = allCodes[:segCount]
662		glyphIndexArray = allCodes[segCount:]
663		lenGIArray = len(glyphIndexArray)
664
665		# build 2-byte character mapping
666		charCodes = []
667		gids = []
668		for i in range(len(startCode) - 1):	# don't do 0xffff!
669			start = startCode[i]
670			delta = idDelta[i]
671			rangeOffset = idRangeOffset[i]
672			# *someone* needs to get killed.
673			partial = rangeOffset // 2 - start + i - len(idRangeOffset)
674
675			rangeCharCodes = list(range(startCode[i], endCode[i] + 1))
676			charCodes.extend(rangeCharCodes)
677			if rangeOffset == 0:
678				gids.extend([(charCode + delta) & 0xFFFF for charCode in rangeCharCodes])
679			else:
680				for charCode in rangeCharCodes:
681					index = charCode + partial
682					assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array  is not less than the length of the array (%d) !" % (i, index, lenGIArray)
683					if glyphIndexArray[index] != 0:  # if not missing glyph
684						glyphID = glyphIndexArray[index] + delta
685					else:
686						glyphID = 0  # missing glyph
687					gids.append(glyphID & 0xFFFF)
688
689		self.cmap = cmap = {}
690		lenCmap = len(gids)
691		glyphOrder = self.ttFont.getGlyphOrder()
692		try:
693			names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids ))
694		except IndexError:
695			getGlyphName = self.ttFont.getGlyphName
696			names = list(map(getGlyphName, gids ))
697		list(map(operator.setitem, [cmap]*lenCmap, charCodes, names))
698
699
700	def compile(self, ttFont):
701		if self.data:
702			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
703
704		charCodes = list(self.cmap.keys())
705		lenCharCodes = len(charCodes)
706		if lenCharCodes == 0:
707			startCode = [0xffff]
708			endCode = [0xffff]
709		else:
710			charCodes.sort()
711			names = list(map(operator.getitem, [self.cmap]*lenCharCodes, charCodes))
712			nameMap = ttFont.getReverseGlyphMap()
713			try:
714				gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
715			except KeyError:
716				nameMap = ttFont.getReverseGlyphMap(rebuild=True)
717				try:
718					gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
719				except KeyError:
720					# allow virtual GIDs in format 4 tables
721					gids = []
722					for name in names:
723						try:
724							gid = nameMap[name]
725						except KeyError:
726							try:
727								if (name[:3] == 'gid'):
728									gid = eval(name[3:])
729								else:
730									gid = ttFont.getGlyphID(name)
731							except:
732								raise KeyError(name)
733
734						gids.append(gid)
735			cmap = {}  # code:glyphID mapping
736			list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids))
737
738			# Build startCode and endCode lists.
739			# Split the char codes in ranges of consecutive char codes, then split
740			# each range in more ranges of consecutive/not consecutive glyph IDs.
741			# See splitRange().
742			lastCode = charCodes[0]
743			endCode = []
744			startCode = [lastCode]
745			for charCode in charCodes[1:]:  # skip the first code, it's the first start code
746				if charCode == lastCode + 1:
747					lastCode = charCode
748					continue
749				start, end = splitRange(startCode[-1], lastCode, cmap)
750				startCode.extend(start)
751				endCode.extend(end)
752				startCode.append(charCode)
753				lastCode = charCode
754			endCode.append(lastCode)
755			startCode.append(0xffff)
756			endCode.append(0xffff)
757
758		# build up rest of cruft
759		idDelta = []
760		idRangeOffset = []
761		glyphIndexArray = []
762		for i in range(len(endCode)-1):  # skip the closing codes (0xffff)
763			indices = []
764			for charCode in range(startCode[i], endCode[i] + 1):
765				indices.append(cmap[charCode])
766			if  (indices == list(range(indices[0], indices[0] + len(indices)))):
767				idDelta.append((indices[0] - startCode[i]) % 0x10000)
768				idRangeOffset.append(0)
769			else:
770				# someone *definitely* needs to get killed.
771				idDelta.append(0)
772				idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i))
773				glyphIndexArray.extend(indices)
774		idDelta.append(1)  # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef
775		idRangeOffset.append(0)
776
777		# Insane.
778		segCount = len(endCode)
779		segCountX2 = segCount * 2
780		searchRange, entrySelector, rangeShift = getSearchRange(segCount, 2)
781
782		charCodeArray = array.array("H", endCode + [0] + startCode)
783		idDeltaArray = array.array("H", idDelta)
784		restArray = array.array("H", idRangeOffset + glyphIndexArray)
785		if sys.byteorder != "big":
786			charCodeArray.byteswap()
787			idDeltaArray.byteswap()
788			restArray.byteswap()
789		data = charCodeArray.tostring() + idDeltaArray.tostring() + restArray.tostring()
790
791		length = struct.calcsize(cmap_format_4_format) + len(data)
792		header = struct.pack(cmap_format_4_format, self.format, length, self.language,
793				segCountX2, searchRange, entrySelector, rangeShift)
794		return header + data
795
796	def fromXML(self, name, attrs, content, ttFont):
797		self.language = safeEval(attrs["language"])
798		if not hasattr(self, "cmap"):
799			self.cmap = {}
800		cmap = self.cmap
801
802		for element in content:
803			if not isinstance(element, tuple):
804				continue
805			nameMap, attrsMap, dummyContent = element
806			if nameMap != "map":
807				assert 0, "Unrecognized keyword in cmap subtable"
808			cmap[safeEval(attrsMap["code"])] = attrsMap["name"]
809
810
811class cmap_format_6(CmapSubtable):
812
813	def decompile(self, data, ttFont):
814		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
815		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
816		if data is not None and ttFont is not None:
817			self.decompileHeader(data[offset:offset+int(length)], ttFont)
818		else:
819			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
820
821		data = self.data # decompileHeader assigns the data after the header to self.data
822		firstCode, entryCount = struct.unpack(">HH", data[:4])
823		firstCode = int(firstCode)
824		data = data[4:]
825		#assert len(data) == 2 * entryCount  # XXX not true in Apple's Helvetica!!!
826		glyphIndexArray = array.array("H")
827		glyphIndexArray.fromstring(data[:2 * int(entryCount)])
828		if sys.byteorder != "big":
829			glyphIndexArray.byteswap()
830		self.data = data = None
831
832		self.cmap = cmap = {}
833
834		lenArray = len(glyphIndexArray)
835		charCodes = list(range(firstCode, firstCode + lenArray))
836		glyphOrder = self.ttFont.getGlyphOrder()
837		try:
838			names = list(map(operator.getitem, [glyphOrder]*lenArray, glyphIndexArray ))
839		except IndexError:
840			getGlyphName = self.ttFont.getGlyphName
841			names = list(map(getGlyphName, glyphIndexArray ))
842		list(map(operator.setitem, [cmap]*lenArray, charCodes, names))
843
844	def compile(self, ttFont):
845		if self.data:
846			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
847		cmap = self.cmap
848		codes = list(cmap.keys())
849		if codes: # yes, there are empty cmap tables.
850			codes = list(range(codes[0], codes[-1] + 1))
851			firstCode = codes[0]
852			valueList = [cmap.get(code, ".notdef") for code in codes]
853			valueList = map(ttFont.getGlyphID, valueList)
854			glyphIndexArray = array.array("H", valueList)
855			if sys.byteorder != "big":
856				glyphIndexArray.byteswap()
857			data = glyphIndexArray.tostring()
858		else:
859			data = b""
860			firstCode = 0
861		header = struct.pack(">HHHHH",
862				6, len(data) + 10, self.language, firstCode, len(codes))
863		return header + data
864
865	def fromXML(self, name, attrs, content, ttFont):
866		self.language = safeEval(attrs["language"])
867		if not hasattr(self, "cmap"):
868			self.cmap = {}
869		cmap = self.cmap
870
871		for element in content:
872			if not isinstance(element, tuple):
873				continue
874			name, attrs, content = element
875			if name != "map":
876				continue
877			cmap[safeEval(attrs["code"])] = attrs["name"]
878
879
880class cmap_format_12_or_13(CmapSubtable):
881
882	def __init__(self, format):
883		self.format = format
884		self.reserved = 0
885		self.data = None
886		self.ttFont = None
887
888	def decompileHeader(self, data, ttFont):
889		format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16])
890		assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length)
891		self.format = format
892		self.reserved = reserved
893		self.length = length
894		self.language = language
895		self.nGroups = nGroups
896		self.data = data[16:]
897		self.ttFont = ttFont
898
899	def decompile(self, data, ttFont):
900		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
901		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
902		if data is not None and ttFont is not None:
903			self.decompileHeader(data[offset:offset+int(length)], ttFont)
904		else:
905			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
906
907		data = self.data # decompileHeader assigns the data after the header to self.data
908		charCodes = []
909		gids = []
910		pos = 0
911		for i in range(self.nGroups):
912			startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] )
913			pos += 12
914			lenGroup = 1 + endCharCode - startCharCode
915			charCodes.extend(list(range(startCharCode, endCharCode +1)))
916			gids.extend(self._computeGIDs(glyphID, lenGroup))
917		self.data = data = None
918		self.cmap = cmap = {}
919		lenCmap = len(gids)
920		glyphOrder = self.ttFont.getGlyphOrder()
921		try:
922			names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids ))
923		except IndexError:
924			getGlyphName = self.ttFont.getGlyphName
925			names = list(map(getGlyphName, gids ))
926		list(map(operator.setitem, [cmap]*lenCmap, charCodes, names))
927
928	def compile(self, ttFont):
929		if self.data:
930			return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data
931		charCodes = list(self.cmap.keys())
932		lenCharCodes = len(charCodes)
933		names = list(self.cmap.values())
934		nameMap = ttFont.getReverseGlyphMap()
935		try:
936			gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
937		except KeyError:
938			nameMap = ttFont.getReverseGlyphMap(rebuild=True)
939			try:
940				gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
941			except KeyError:
942				# allow virtual GIDs in format 12 tables
943				gids = []
944				for name in names:
945					try:
946						gid = nameMap[name]
947					except KeyError:
948						try:
949							if (name[:3] == 'gid'):
950								gid = eval(name[3:])
951							else:
952								gid = ttFont.getGlyphID(name)
953						except:
954							raise KeyError(name)
955
956					gids.append(gid)
957
958		cmap = {}  # code:glyphID mapping
959		list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids))
960
961		charCodes.sort()
962		index = 0
963		startCharCode = charCodes[0]
964		startGlyphID = cmap[startCharCode]
965		lastGlyphID = startGlyphID - self._format_step
966		lastCharCode = startCharCode - 1
967		nGroups = 0
968		dataList =  []
969		maxIndex = len(charCodes)
970		for index in range(maxIndex):
971			charCode = charCodes[index]
972			glyphID = cmap[charCode]
973			if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode):
974				dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
975				startCharCode = charCode
976				startGlyphID = glyphID
977				nGroups = nGroups + 1
978			lastGlyphID = glyphID
979			lastCharCode = charCode
980		dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
981		nGroups = nGroups + 1
982		data = bytesjoin(dataList)
983		lengthSubtable = len(data) +16
984		assert len(data) == (nGroups*12) == (lengthSubtable-16)
985		return struct.pack(">HHLLL", self.format, self.reserved , lengthSubtable, self.language, nGroups) + data
986
987	def toXML(self, writer, ttFont):
988		writer.begintag(self.__class__.__name__, [
989				("platformID", self.platformID),
990				("platEncID", self.platEncID),
991				("format", self.format),
992				("reserved", self.reserved),
993				("length", self.length),
994				("language", self.language),
995				("nGroups", self.nGroups),
996				])
997		writer.newline()
998		codes = sorted(self.cmap.items())
999		self._writeCodes(codes, writer)
1000		writer.endtag(self.__class__.__name__)
1001		writer.newline()
1002
1003	def fromXML(self, name, attrs, content, ttFont):
1004		self.format = safeEval(attrs["format"])
1005		self.reserved = safeEval(attrs["reserved"])
1006		self.length = safeEval(attrs["length"])
1007		self.language = safeEval(attrs["language"])
1008		self.nGroups = safeEval(attrs["nGroups"])
1009		if not hasattr(self, "cmap"):
1010			self.cmap = {}
1011		cmap = self.cmap
1012
1013		for element in content:
1014			if not isinstance(element, tuple):
1015				continue
1016			name, attrs, content = element
1017			if name != "map":
1018				continue
1019			cmap[safeEval(attrs["code"])] = attrs["name"]
1020
1021
1022class cmap_format_12(cmap_format_12_or_13):
1023	def __init__(self, format):
1024		cmap_format_12_or_13.__init__(self, format)
1025		self._format_step = 1
1026
1027	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
1028		return list(range(startingGlyph, startingGlyph + numberOfGlyphs))
1029
1030	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
1031		return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode)
1032
1033
1034class cmap_format_13(cmap_format_12_or_13):
1035	def __init__(self, format):
1036		cmap_format_12_or_13.__init__(self, format)
1037		self._format_step = 0
1038
1039	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
1040		return [startingGlyph] * numberOfGlyphs
1041
1042	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
1043		return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode)
1044
1045
1046def  cvtToUVS(threeByteString):
1047	data = b"\0" + threeByteString
1048	val, = struct.unpack(">L", data)
1049	return val
1050
1051def  cvtFromUVS(val):
1052	assert 0 <= val < 0x1000000
1053	fourByteString = struct.pack(">L", val)
1054	return fourByteString[1:]
1055
1056
1057class cmap_format_14(CmapSubtable):
1058
1059	def decompileHeader(self, data, ttFont):
1060		format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10])
1061		self.data = data[10:]
1062		self.length = length
1063		self.numVarSelectorRecords = numVarSelectorRecords
1064		self.ttFont = ttFont
1065		self.language = 0xFF # has no language.
1066
1067	def decompile(self, data, ttFont):
1068		if data is not None and ttFont is not None and ttFont.lazy:
1069			self.decompileHeader(data, ttFont)
1070		else:
1071			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
1072		data = self.data
1073
1074		self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1075		uvsDict = {}
1076		recOffset = 0
1077		for n in range(self.numVarSelectorRecords):
1078			uvs, defOVSOffset, nonDefUVSOffset =  struct.unpack(">3sLL", data[recOffset:recOffset +11])
1079			recOffset += 11
1080			varUVS = cvtToUVS(uvs)
1081			if defOVSOffset:
1082				startOffset = defOVSOffset  - 10
1083				numValues, = struct.unpack(">L", data[startOffset:startOffset+4])
1084				startOffset +=4
1085				for r in range(numValues):
1086					uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4])
1087					startOffset += 4
1088					firstBaseUV = cvtToUVS(uv)
1089					cnt = addtlCnt+1
1090					baseUVList = list(range(firstBaseUV, firstBaseUV+cnt))
1091					glyphList = [None]*cnt
1092					localUVList = zip(baseUVList, glyphList)
1093					try:
1094						uvsDict[varUVS].extend(localUVList)
1095					except KeyError:
1096						uvsDict[varUVS] = list(localUVList)
1097
1098			if nonDefUVSOffset:
1099				startOffset = nonDefUVSOffset  - 10
1100				numRecs, = struct.unpack(">L", data[startOffset:startOffset+4])
1101				startOffset +=4
1102				localUVList = []
1103				for r in range(numRecs):
1104					uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5])
1105					startOffset += 5
1106					uv = cvtToUVS(uv)
1107					glyphName = self.ttFont.getGlyphName(gid)
1108					localUVList.append( [uv, glyphName] )
1109				try:
1110					uvsDict[varUVS].extend(localUVList)
1111				except KeyError:
1112					uvsDict[varUVS] = localUVList
1113
1114		self.uvsDict = uvsDict
1115
1116	def toXML(self, writer, ttFont):
1117		writer.begintag(self.__class__.__name__, [
1118				("platformID", self.platformID),
1119				("platEncID", self.platEncID),
1120				("format", self.format),
1121				("length", self.length),
1122				("numVarSelectorRecords", self.numVarSelectorRecords),
1123				])
1124		writer.newline()
1125		uvsDict = self.uvsDict
1126		uvsList = sorted(uvsDict.keys())
1127		for uvs in uvsList:
1128			uvList = uvsDict[uvs]
1129			uvList.sort(key=lambda item: (item[1] is not None, item[0], item[1]))
1130			for uv, gname in uvList:
1131				if gname is None:
1132					gname = "None"
1133				# I use the arg rather than th keyword syntax in order to preserve the attribute order.
1134				writer.simpletag("map", [ ("uvs",hex(uvs)), ("uv",hex(uv)), ("name", gname)]  )
1135				writer.newline()
1136		writer.endtag(self.__class__.__name__)
1137		writer.newline()
1138
1139	def fromXML(self, name, attrs, content, ttFont):
1140		self.format = safeEval(attrs["format"])
1141		self.length = safeEval(attrs["length"])
1142		self.numVarSelectorRecords = safeEval(attrs["numVarSelectorRecords"])
1143		self.language = 0xFF # provide a value so that  CmapSubtable.__lt__() won't fail
1144		if not hasattr(self, "cmap"):
1145			self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1146		if not hasattr(self, "uvsDict"):
1147			self.uvsDict  = {}
1148			uvsDict = self.uvsDict
1149
1150		for element in content:
1151			if not isinstance(element, tuple):
1152				continue
1153			name, attrs, content = element
1154			if name != "map":
1155				continue
1156			uvs = safeEval(attrs["uvs"])
1157			uv = safeEval(attrs["uv"])
1158			gname = attrs["name"]
1159			if gname == "None":
1160				gname = None
1161			try:
1162				uvsDict[uvs].append( [uv, gname])
1163			except KeyError:
1164				uvsDict[uvs] = [ [uv, gname] ]
1165
1166
1167	def compile(self, ttFont):
1168		if self.data:
1169			return struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) + self.data
1170
1171		uvsDict = self.uvsDict
1172		uvsList = sorted(uvsDict.keys())
1173		self.numVarSelectorRecords = len(uvsList)
1174		offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block.
1175		data = []
1176		varSelectorRecords =[]
1177		for uvs in uvsList:
1178			entryList = uvsDict[uvs]
1179
1180			defList = [entry for entry in entryList if entry[1] is None]
1181			if defList:
1182				defList = [entry[0] for entry in defList]
1183				defOVSOffset = offset
1184				defList.sort()
1185
1186				lastUV = defList[0]
1187				cnt = -1
1188				defRecs = []
1189				for defEntry in defList:
1190					cnt +=1
1191					if (lastUV+cnt) != defEntry:
1192						rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1)
1193						lastUV = defEntry
1194						defRecs.append(rec)
1195						cnt = 0
1196
1197				rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt)
1198				defRecs.append(rec)
1199
1200				numDefRecs = len(defRecs)
1201				data.append(struct.pack(">L", numDefRecs))
1202				data.extend(defRecs)
1203				offset += 4 + numDefRecs*4
1204			else:
1205				defOVSOffset = 0
1206
1207			ndefList = [entry for entry in entryList if entry[1] is not None]
1208			if ndefList:
1209				nonDefUVSOffset = offset
1210				ndefList.sort()
1211				numNonDefRecs = len(ndefList)
1212				data.append(struct.pack(">L", numNonDefRecs))
1213				offset += 4 + numNonDefRecs*5
1214
1215				for uv, gname in ndefList:
1216					gid = ttFont.getGlyphID(gname)
1217					ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid)
1218					data.append(ndrec)
1219			else:
1220				nonDefUVSOffset = 0
1221
1222			vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset)
1223			varSelectorRecords.append(vrec)
1224
1225		data = bytesjoin(varSelectorRecords) + bytesjoin(data)
1226		self.length = 10 + len(data)
1227		headerdata = struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords)
1228		self.data = headerdata + data
1229
1230		return self.data
1231
1232
1233class cmap_format_unknown(CmapSubtable):
1234
1235	def toXML(self, writer, ttFont):
1236		cmapName = self.__class__.__name__[:12] + str(self.format)
1237		writer.begintag(cmapName, [
1238				("platformID", self.platformID),
1239				("platEncID", self.platEncID),
1240				])
1241		writer.newline()
1242		writer.dumphex(self.data)
1243		writer.endtag(cmapName)
1244		writer.newline()
1245
1246	def fromXML(self, name, attrs, content, ttFont):
1247		self.data = readHex(content)
1248		self.cmap = {}
1249
1250	def decompileHeader(self, data, ttFont):
1251		self.language = 0  # dummy value
1252		self.data = data
1253
1254	def decompile(self, data, ttFont):
1255		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
1256		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
1257		if data is not None and ttFont is not None:
1258			self.decompileHeader(data[offset:offset+int(length)], ttFont)
1259		else:
1260			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
1261
1262	def compile(self, ttFont):
1263		if self.data:
1264			return self.data
1265		else:
1266			return None
1267
1268cmap_classes = {
1269		0: cmap_format_0,
1270		2: cmap_format_2,
1271		4: cmap_format_4,
1272		6: cmap_format_6,
1273		12: cmap_format_12,
1274		13: cmap_format_13,
1275		14: cmap_format_14,
1276		}
1277