_c_m_a_p.py revision 3ec6a258238b6068e4eef3fe579f1f5c0a06bbba
1import sys
2from . import DefaultTable
3import struct
4import array
5import operator
6from fontTools import ttLib
7from fontTools.misc.textTools import safeEval, readHex
8from types import TupleType
9
10
11class table__c_m_a_p(DefaultTable.DefaultTable):
12
13	def getcmap(self, platformID, platEncID):
14		for subtable in self.tables:
15			if (subtable.platformID == platformID and
16					subtable.platEncID == platEncID):
17				return subtable
18		return None # not found
19
20	def decompile(self, data, ttFont):
21		tableVersion, numSubTables = struct.unpack(">HH", data[:4])
22		self.tableVersion = int(tableVersion)
23		self.tables = tables = []
24		seenOffsets = {}
25		for i in range(numSubTables):
26			platformID, platEncID, offset = struct.unpack(
27					">HHl", data[4+i*8:4+(i+1)*8])
28			platformID, platEncID = int(platformID), int(platEncID)
29			format, length = struct.unpack(">HH", data[offset:offset+4])
30			if format in [8,10,12,13]:
31				format, reserved, length = struct.unpack(">HHL", data[offset:offset+8])
32			elif format in [14]:
33				format, length = struct.unpack(">HL", data[offset:offset+6])
34
35			if not length:
36				print("Error: cmap subtable is reported as having zero length: platformID %s, platEncID %s,  format %s offset %s. Skipping table." % (platformID, platEncID,format, offset))
37				continue
38			if format not in cmap_classes:
39				table = cmap_format_unknown(format)
40			else:
41				table = cmap_classes[format](format)
42			table.platformID = platformID
43			table.platEncID = platEncID
44			# Note that by default we decompile only the subtable header info;
45			# any other data gets decompiled only when an attribute of the
46			# subtable is referenced.
47			table.decompileHeader(data[offset:offset+int(length)], ttFont)
48			if offset in seenOffsets:
49				table.cmap = tables[seenOffsets[offset]].cmap
50			else:
51				seenOffsets[offset] = i
52			tables.append(table)
53
54	def compile(self, ttFont):
55		self.tables.sort()    # sort according to the spec; see CmapSubtable.__cmp__()
56		numSubTables = len(self.tables)
57		totalOffset = 4 + 8 * numSubTables
58		data = struct.pack(">HH", self.tableVersion, numSubTables)
59		tableData = ""
60		seen = {}  # Some tables are the same object reference. Don't compile them twice.
61		done = {}  # Some tables are different objects, but compile to the same data chunk
62		for table in self.tables:
63			try:
64				offset = seen[id(table.cmap)]
65			except KeyError:
66				chunk = table.compile(ttFont)
67				if chunk in done:
68					offset = done[chunk]
69				else:
70					offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData)
71					tableData = tableData + chunk
72			data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset)
73		return data + tableData
74
75	def toXML(self, writer, ttFont):
76		writer.simpletag("tableVersion", version=self.tableVersion)
77		writer.newline()
78		for table in self.tables:
79			table.toXML(writer, ttFont)
80
81	def fromXML(self, name, attrs, content, ttFont):
82		if name == "tableVersion":
83			self.tableVersion = safeEval(attrs["version"])
84			return
85		if name[:12] != "cmap_format_":
86			return
87		if not hasattr(self, "tables"):
88			self.tables = []
89		format = safeEval(name[12:])
90		if format not in cmap_classes:
91			table = cmap_format_unknown(format)
92		else:
93			table = cmap_classes[format](format)
94		table.platformID = safeEval(attrs["platformID"])
95		table.platEncID = safeEval(attrs["platEncID"])
96		table.fromXML(name, attrs, content, ttFont)
97		self.tables.append(table)
98
99
100class CmapSubtable:
101
102	def __init__(self, format):
103		self.format = format
104		self.data = None
105		self.ttFont = None
106
107	def __getattr__(self, attr):
108		# allow lazy decompilation of subtables.
109		if attr[:2] == '__': # don't handle requests for member functions like '__lt__'
110			raise AttributeError(attr)
111		if self.data == None:
112			raise AttributeError(attr)
113		self.decompile(None, None) # use saved data.
114		self.data = None # Once this table has been decompiled, make sure we don't
115						# just return the original data. Also avoids recursion when
116						# called with an attribute that the cmap subtable doesn't have.
117		return getattr(self, attr)
118
119	def decompileHeader(self, data, ttFont):
120		format, length, language = struct.unpack(">HHH", data[:6])
121		assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length)
122		self.format = int(format)
123		self.length = int(length)
124		self.language = int(language)
125		self.data = data[6:]
126		self.ttFont = ttFont
127
128	def toXML(self, writer, ttFont):
129		writer.begintag(self.__class__.__name__, [
130				("platformID", self.platformID),
131				("platEncID", self.platEncID),
132				("language", self.language),
133				])
134		writer.newline()
135		codes = sorted(self.cmap.items())
136		self._writeCodes(codes, writer)
137		writer.endtag(self.__class__.__name__)
138		writer.newline()
139
140	def _writeCodes(self, codes, writer):
141		if (self.platformID, self.platEncID) == (3, 1) or (self.platformID, self.platEncID) == (3, 10) or self.platformID == 0:
142			from fontTools.unicode import Unicode
143			isUnicode = 1
144		else:
145			isUnicode = 0
146		for code, name in codes:
147			writer.simpletag("map", code=hex(code), name=name)
148			if isUnicode:
149				writer.comment(Unicode[code])
150			writer.newline()
151
152	def __cmp__(self, other):
153		if not isinstance(self, type(other)): return cmp(type(self), type(other))
154
155		# implemented so that list.sort() sorts according to the cmap spec.
156		selfTuple = (
157			getattr(self, "platformID", None),
158			getattr(self, "platEncID", None),
159			getattr(self, "language", None),
160			self.__dict__)
161		otherTuple = (
162			getattr(other, "platformID", None),
163			getattr(other, "platEncID", None),
164			getattr(other, "language", None),
165			other.__dict__)
166		return cmp(selfTuple, otherTuple)
167
168
169class cmap_format_0(CmapSubtable):
170
171	def decompile(self, data, ttFont):
172		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
173		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
174		if data != None and ttFont != None:
175			self.decompileHeader(data[offset:offset+int(length)], ttFont)
176		else:
177			assert (data == None and ttFont == None), "Need both data and ttFont arguments"
178		data = self.data # decompileHeader assigns the data after the header to self.data
179		assert 262 == self.length, "Format 0 cmap subtable not 262 bytes"
180		glyphIdArray = array.array("B")
181		glyphIdArray.fromstring(self.data)
182		self.cmap = cmap = {}
183		lenArray = len(glyphIdArray)
184		charCodes = list(range(lenArray))
185		names = map(self.ttFont.getGlyphName, glyphIdArray)
186		list(map(operator.setitem, [cmap]*lenArray, charCodes, names))
187
188
189	def compile(self, ttFont):
190		if self.data:
191			return struct.pack(">HHH", 0, 262, self.language) + self.data
192
193		charCodeList = sorted(self.cmap.items())
194		charCodes = [entry[0] for entry in charCodeList]
195		valueList = [entry[1] for entry in charCodeList]
196		assert charCodes == list(range(256))
197		valueList = map(ttFont.getGlyphID, valueList)
198
199		glyphIdArray = array.array("B", valueList)
200		data = struct.pack(">HHH", 0, 262, self.language) + glyphIdArray.tostring()
201		assert len(data) == 262
202		return data
203
204	def fromXML(self, name, attrs, content, ttFont):
205		self.language = safeEval(attrs["language"])
206		if not hasattr(self, "cmap"):
207			self.cmap = {}
208		cmap = self.cmap
209		for element in content:
210			if not isinstance(element, TupleType):
211				continue
212			name, attrs, content = element
213			if name != "map":
214				continue
215			cmap[safeEval(attrs["code"])] = attrs["name"]
216
217
218subHeaderFormat = ">HHhH"
219class SubHeader:
220	def __init__(self):
221		self.firstCode = None
222		self.entryCount = None
223		self.idDelta = None
224		self.idRangeOffset = None
225		self.glyphIndexArray = []
226
227class cmap_format_2(CmapSubtable):
228
229	def setIDDelta(self, subHeader):
230		subHeader.idDelta = 0
231		# find the minGI which is not zero.
232		minGI = subHeader.glyphIndexArray[0]
233		for gid in subHeader.glyphIndexArray:
234			if (gid != 0) and (gid < minGI):
235				minGI = gid
236		# The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
237		# idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K.
238		# We would like to pick an idDelta such that the first glyphArray GID is 1,
239		# so that we are more likely to be able to combine glypharray GID subranges.
240		# This means that we have a problem when minGI is > 32K
241		# Since the final gi is reconstructed from the glyphArray GID by:
242		#    (short)finalGID = (gid +  idDelta) % 0x10000),
243		# we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the
244		# negative number to an unsigned short.
245
246		if  (minGI > 1):
247			if  minGI > 0x7FFF:
248				subHeader.idDelta = -(0x10000 - minGI) -1
249			else:
250				subHeader.idDelta =  minGI -1
251			idDelta = subHeader.idDelta
252			for i in range(subHeader.entryCount):
253				gid = subHeader.glyphIndexArray[i]
254				if gid > 0:
255					subHeader.glyphIndexArray[i] = gid - idDelta
256
257
258	def decompile(self, data, ttFont):
259		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
260		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
261		if data != None and ttFont != None:
262			self.decompileHeader(data[offset:offset+int(length)], ttFont)
263		else:
264			assert (data == None and ttFont == None), "Need both data and ttFont arguments"
265
266		data = self.data # decompileHeader assigns the data after the header to self.data
267		subHeaderKeys = []
268		maxSubHeaderindex = 0
269		# get the key array, and determine the number of subHeaders.
270		allKeys = array.array("H")
271		allKeys.fromstring(data[:512])
272		data = data[512:]
273		if sys.byteorder != "big":
274			allKeys.byteswap()
275		subHeaderKeys = [ key/8 for key in allKeys]
276		maxSubHeaderindex = max(subHeaderKeys)
277
278		#Load subHeaders
279		subHeaderList = []
280		pos = 0
281		for i in range(maxSubHeaderindex + 1):
282			subHeader = SubHeader()
283			(subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \
284				subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8])
285			pos += 8
286			giDataPos = pos + subHeader.idRangeOffset-2
287			giList = array.array("H")
288			giList.fromstring(data[giDataPos:giDataPos + subHeader.entryCount*2])
289			if sys.byteorder != "big":
290				giList.byteswap()
291			subHeader.glyphIndexArray = giList
292			subHeaderList.append(subHeader)
293		# How this gets processed.
294		# Charcodes may be one or two bytes.
295		# The first byte of a charcode is mapped through the  subHeaderKeys, to select
296		# a subHeader. For any subheader but 0, the next byte is then mapped through the
297		# selected subheader. If subheader Index 0 is selected, then the byte itself is
298		# mapped through the subheader, and there is no second byte.
299		# Then assume that the subsequent byte is the first byte of the next charcode,and repeat.
300		#
301		# Each subheader references a range in the glyphIndexArray whose length is entryCount.
302		# The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray
303		# referenced by another subheader.
304		# The only subheader that will be referenced by more than one first-byte value is the subheader
305		# that maps the entire range of glyphID values to glyphIndex 0, e.g notdef:
306		#	 {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx}
307		# A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex.
308		# A subheader specifies a subrange within (0...256) by the
309		# firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero
310		# (e.g. glyph not in font).
311		# If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar).
312		# The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by
313		# counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the
314		# glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex.
315		# Example for Logocut-Medium
316		# first byte of charcode = 129; selects subheader 1.
317		# subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252}
318		# second byte of charCode = 66
319		# the index offset = 66-64 = 2.
320		# The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is:
321		# [glyphIndexArray index], [subrange array index] = glyphIndex
322		# [256], [0]=1 	from charcode [129, 64]
323		# [257], [1]=2  	from charcode [129, 65]
324		# [258], [2]=3  	from charcode [129, 66]
325		# [259], [3]=4  	from charcode [129, 67]
326		# So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero,
327		# add it to the glyphID to get the final glyphIndex
328		# value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew!
329
330		self.data = ""
331		self.cmap = cmap = {}
332		notdefGI = 0
333		for firstByte in range(256):
334			subHeadindex = subHeaderKeys[firstByte]
335			subHeader = subHeaderList[subHeadindex]
336			if subHeadindex == 0:
337				if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount):
338					continue # gi is notdef.
339				else:
340					charCode = firstByte
341					offsetIndex = firstByte - subHeader.firstCode
342					gi = subHeader.glyphIndexArray[offsetIndex]
343					if gi != 0:
344						gi = (gi + subHeader.idDelta) % 0x10000
345					else:
346						continue # gi is notdef.
347				cmap[charCode] = gi
348			else:
349				if subHeader.entryCount:
350					charCodeOffset = firstByte * 256 + subHeader.firstCode
351					for offsetIndex in range(subHeader.entryCount):
352						charCode = charCodeOffset + offsetIndex
353						gi = subHeader.glyphIndexArray[offsetIndex]
354						if gi != 0:
355							gi = (gi + subHeader.idDelta) % 0x10000
356						else:
357							continue
358						cmap[charCode] = gi
359				# If not subHeader.entryCount, then all char codes with this first byte are
360				# mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the
361				# same as mapping it to .notdef.
362		# cmap values are GID's.
363		glyphOrder = self.ttFont.getGlyphOrder()
364		gids = cmap.values()
365		charCodes = cmap.keys()
366		lenCmap = len(gids)
367		try:
368			names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids ))
369		except IndexError:
370			getGlyphName = self.ttFont.getGlyphName
371			names = list(map(getGlyphName, gids ))
372		list(map(operator.setitem, [cmap]*lenCmap, charCodes, names))
373
374
375	def compile(self, ttFont):
376		if self.data:
377			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
378		kEmptyTwoCharCodeRange = -1
379		notdefGI = 0
380
381		items = sorted(self.cmap.items())
382		charCodes = [item[0] for item in items]
383		names = [item[1] for item in items]
384		nameMap = ttFont.getReverseGlyphMap()
385		lenCharCodes = len(charCodes)
386		try:
387			gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
388		except KeyError:
389			nameMap = ttFont.getReverseGlyphMap(rebuild=1)
390			try:
391				gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
392			except KeyError:
393				# allow virtual GIDs in format 2 tables
394				gids = []
395				for name in names:
396					try:
397						gid = nameMap[name]
398					except KeyError:
399						try:
400							if (name[:3] == 'gid'):
401								gid = eval(name[3:])
402							else:
403								gid = ttFont.getGlyphID(name)
404						except:
405							raise KeyError(name)
406
407					gids.append(gid)
408
409		# Process the (char code to gid) item list  in char code order.
410		# By definition, all one byte char codes map to subheader 0.
411		# For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0,
412		# which defines all char codes in its range to map to notdef) unless proven otherwise.
413		# Note that since the char code items are processed in char code order, all the char codes with the
414		# same first byte are in sequential order.
415
416		subHeaderKeys = [ kEmptyTwoCharCodeRange for x in  range(256)] # list of indices into subHeaderList.
417		subHeaderList = []
418
419		# We force this subheader entry 0  to exist in the subHeaderList in the case where some one comes up
420		# with a cmap where all the one byte char codes map to notdef,
421		# with the result that the subhead 0 would not get created just by processing the item list.
422		charCode = charCodes[0]
423		if charCode > 255:
424			subHeader = SubHeader()
425			subHeader.firstCode = 0
426			subHeader.entryCount = 0
427			subHeader.idDelta = 0
428			subHeader.idRangeOffset = 0
429			subHeaderList.append(subHeader)
430
431
432		lastFirstByte = -1
433		items = zip(charCodes, gids)
434		for charCode, gid in items:
435			if gid == 0:
436				continue
437			firstbyte = charCode >> 8
438			secondByte = charCode & 0x00FF
439
440			if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one.
441				if lastFirstByte > -1:
442					# fix GI's and iDelta of current subheader.
443					self.setIDDelta(subHeader)
444
445					# If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero
446					# for the indices matching the char codes.
447					if lastFirstByte == 0:
448						for index in range(subHeader.entryCount):
449							charCode = subHeader.firstCode + index
450							subHeaderKeys[charCode] = 0
451
452					assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange."
453				# init new subheader
454				subHeader = SubHeader()
455				subHeader.firstCode = secondByte
456				subHeader.entryCount = 1
457				subHeader.glyphIndexArray.append(gid)
458				subHeaderList.append(subHeader)
459				subHeaderKeys[firstbyte] = len(subHeaderList) -1
460				lastFirstByte = firstbyte
461			else:
462				# need to fill in with notdefs all the code points between the last charCode and the current charCode.
463				codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount)
464				for i in range(codeDiff):
465					subHeader.glyphIndexArray.append(notdefGI)
466				subHeader.glyphIndexArray.append(gid)
467				subHeader.entryCount = subHeader.entryCount + codeDiff + 1
468
469		# fix GI's and iDelta of last subheader that we we added to the subheader array.
470		self.setIDDelta(subHeader)
471
472		# Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges.
473		subHeader = SubHeader()
474		subHeader.firstCode = 0
475		subHeader.entryCount = 0
476		subHeader.idDelta = 0
477		subHeader.idRangeOffset = 2
478		subHeaderList.append(subHeader)
479		emptySubheadIndex = len(subHeaderList) - 1
480		for index in range(256):
481			if subHeaderKeys[index] == kEmptyTwoCharCodeRange:
482				subHeaderKeys[index] = emptySubheadIndex
483		# Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the
484		# idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray,
485		# since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with
486		# charcode 0 and GID 0.
487
488		idRangeOffset = (len(subHeaderList)-1)*8  + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset.
489		subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2.
490		for index in range(subheadRangeLen):
491			subHeader = subHeaderList[index]
492			subHeader.idRangeOffset = 0
493			for j  in range(index):
494				prevSubhead = subHeaderList[j]
495				if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray
496					subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8
497					subHeader.glyphIndexArray = []
498					break
499			if subHeader.idRangeOffset == 0: # didn't find one.
500				subHeader.idRangeOffset = idRangeOffset
501				idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray.
502			else:
503				idRangeOffset = idRangeOffset - 8  # one less subheader
504
505		# Now we can write out the data!
506		length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array.
507		for subhead in 	subHeaderList[:-1]:
508			length = length + len(subhead.glyphIndexArray)*2  # We can't use subhead.entryCount, as some of the subhead may share subArrays.
509		dataList = [struct.pack(">HHH", 2, length, self.language)]
510		for index in subHeaderKeys:
511			dataList.append(struct.pack(">H", index*8))
512		for subhead in 	subHeaderList:
513			dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset))
514		for subhead in 	subHeaderList[:-1]:
515			for gi in subhead.glyphIndexArray:
516				dataList.append(struct.pack(">H", gi))
517		data = "".join(dataList)
518		assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length)
519		return data
520
521
522	def fromXML(self, name, attrs, content, ttFont):
523		self.language = safeEval(attrs["language"])
524		if not hasattr(self, "cmap"):
525			self.cmap = {}
526		cmap = self.cmap
527
528		for element in content:
529			if not isinstance(element, TupleType):
530				continue
531			name, attrs, content = element
532			if name != "map":
533				continue
534			cmap[safeEval(attrs["code"])] = attrs["name"]
535
536
537cmap_format_4_format = ">7H"
538
539#uint16  endCode[segCount]          # Ending character code for each segment, last = 0xFFFF.
540#uint16  reservedPad                # This value should be zero
541#uint16  startCode[segCount]        # Starting character code for each segment
542#uint16  idDelta[segCount]          # Delta for all character codes in segment
543#uint16  idRangeOffset[segCount]    # Offset in bytes to glyph indexArray, or 0
544#uint16  glyphIndexArray[variable]  # Glyph index array
545
546def splitRange(startCode, endCode, cmap):
547	# Try to split a range of character codes into subranges with consecutive
548	# glyph IDs in such a way that the cmap4 subtable can be stored "most"
549	# efficiently. I can't prove I've got the optimal solution, but it seems
550	# to do well with the fonts I tested: none became bigger, many became smaller.
551	if startCode == endCode:
552		return [], [endCode]
553
554	lastID = cmap[startCode]
555	lastCode = startCode
556	inOrder = None
557	orderedBegin = None
558	subRanges = []
559
560	# Gather subranges in which the glyph IDs are consecutive.
561	for code in range(startCode + 1, endCode + 1):
562		glyphID = cmap[code]
563
564		if glyphID - 1 == lastID:
565			if inOrder is None or not inOrder:
566				inOrder = 1
567				orderedBegin = lastCode
568		else:
569			if inOrder:
570				inOrder = 0
571				subRanges.append((orderedBegin, lastCode))
572				orderedBegin = None
573
574		lastID = glyphID
575		lastCode = code
576
577	if inOrder:
578		subRanges.append((orderedBegin, lastCode))
579	assert lastCode == endCode
580
581	# Now filter out those new subranges that would only make the data bigger.
582	# A new segment cost 8 bytes, not using a new segment costs 2 bytes per
583	# character.
584	newRanges = []
585	for b, e in subRanges:
586		if b == startCode and e == endCode:
587			break  # the whole range, we're fine
588		if b == startCode or e == endCode:
589			threshold = 4  # split costs one more segment
590		else:
591			threshold = 8  # split costs two more segments
592		if (e - b + 1) > threshold:
593			newRanges.append((b, e))
594	subRanges = newRanges
595
596	if not subRanges:
597		return [], [endCode]
598
599	if subRanges[0][0] != startCode:
600		subRanges.insert(0, (startCode, subRanges[0][0] - 1))
601	if subRanges[-1][1] != endCode:
602		subRanges.append((subRanges[-1][1] + 1, endCode))
603
604	# Fill the "holes" in the segments list -- those are the segments in which
605	# the glyph IDs are _not_ consecutive.
606	i = 1
607	while i < len(subRanges):
608		if subRanges[i-1][1] + 1 != subRanges[i][0]:
609			subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1))
610			i = i + 1
611		i = i + 1
612
613	# Transform the ranges into startCode/endCode lists.
614	start = []
615	end = []
616	for b, e in subRanges:
617		start.append(b)
618		end.append(e)
619	start.pop(0)
620
621	assert len(start) + 1 == len(end)
622	return start, end
623
624
625class cmap_format_4(CmapSubtable):
626
627	def decompile(self, data, ttFont):
628		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
629		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
630		if data != None and ttFont != None:
631			self.decompileHeader(self.data[offset:offset+int(length)], ttFont)
632		else:
633			assert (data == None and ttFont == None), "Need both data and ttFont arguments"
634
635		data = self.data # decompileHeader assigns the data after the header to self.data
636		(segCountX2, searchRange, entrySelector, rangeShift) = \
637					struct.unpack(">4H", data[:8])
638		data = data[8:]
639		segCount = segCountX2 / 2
640
641		allCodes = array.array("H")
642		allCodes.fromstring(data)
643		self.data = data = None
644
645		if sys.byteorder != "big":
646			allCodes.byteswap()
647
648		# divide the data
649		endCode = allCodes[:segCount]
650		allCodes = allCodes[segCount+1:]  # the +1 is skipping the reservedPad field
651		startCode = allCodes[:segCount]
652		allCodes = allCodes[segCount:]
653		idDelta = allCodes[:segCount]
654		allCodes = allCodes[segCount:]
655		idRangeOffset = allCodes[:segCount]
656		glyphIndexArray = allCodes[segCount:]
657		lenGIArray = len(glyphIndexArray)
658
659		# build 2-byte character mapping
660		charCodes = []
661		gids = []
662		for i in range(len(startCode) - 1):	# don't do 0xffff!
663			rangeCharCodes = list(range(startCode[i], endCode[i] + 1))
664			charCodes = charCodes + rangeCharCodes
665			for charCode in rangeCharCodes:
666				rangeOffset = idRangeOffset[i]
667				if rangeOffset == 0:
668					glyphID = charCode + idDelta[i]
669				else:
670					# *someone* needs to get killed.
671					index = idRangeOffset[i] / 2 + (charCode - startCode[i]) + i - len(idRangeOffset)
672					assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array  is not less than the length of the array (%d) !" % (i, index, lenGIArray)
673					if glyphIndexArray[index] != 0:  # if not missing glyph
674						glyphID = glyphIndexArray[index] + idDelta[i]
675					else:
676						glyphID = 0  # missing glyph
677				gids.append(glyphID % 0x10000)
678
679		self.cmap = cmap = {}
680		lenCmap = len(gids)
681		glyphOrder = self.ttFont.getGlyphOrder()
682		try:
683			names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids ))
684		except IndexError:
685			getGlyphName = self.ttFont.getGlyphName
686			names = list(map(getGlyphName, gids ))
687		list(map(operator.setitem, [cmap]*lenCmap, charCodes, names))
688
689
690
691	def setIDDelta(self, idDelta):
692		# The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
693		# idDelta is a short, and must be between -32K and 32K
694		# startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1
695		# This means that we have a problem because we can need to assign to idDelta values
696		# between -(64K-2) and 64K -1.
697		# Since the final gi is reconstructed from the glyphArray GID by:
698		#    (short)finalGID = (gid +  idDelta) % 0x10000),
699		# we can get from a startCode of 0 to a final GID of 64 -1K by subtracting 1, and casting the
700		# negative number to an unsigned short.
701		# Similarly , we can get from a startCode of 64K-1 to a final GID of 1 by adding 2, because of
702		# the modulo arithmetic.
703
704		if idDelta > 0x7FFF:
705			idDelta = idDelta - 0x10000
706		elif idDelta <  -0x7FFF:
707			idDelta = idDelta + 0x10000
708
709		return idDelta
710
711
712	def compile(self, ttFont):
713		if self.data:
714			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
715
716		from fontTools.ttLib.sfnt import maxPowerOfTwo
717
718		charCodes = self.cmap.keys()
719		lenCharCodes = len(charCodes)
720		if lenCharCodes == 0:
721			startCode = [0xffff]
722			endCode = [0xffff]
723		else:
724			charCodes.sort()
725			names = list(map(operator.getitem, [self.cmap]*lenCharCodes, charCodes))
726			nameMap = ttFont.getReverseGlyphMap()
727			try:
728				gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
729			except KeyError:
730				nameMap = ttFont.getReverseGlyphMap(rebuild=1)
731				try:
732					gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
733				except KeyError:
734					# allow virtual GIDs in format 4 tables
735					gids = []
736					for name in names:
737						try:
738							gid = nameMap[name]
739						except KeyError:
740							try:
741								if (name[:3] == 'gid'):
742									gid = eval(name[3:])
743								else:
744									gid = ttFont.getGlyphID(name)
745							except:
746								raise KeyError(name)
747
748						gids.append(gid)
749			cmap = {}  # code:glyphID mapping
750			list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids))
751
752			# Build startCode and endCode lists.
753			# Split the char codes in ranges of consecutive char codes, then split
754			# each range in more ranges of consecutive/not consecutive glyph IDs.
755			# See splitRange().
756			lastCode = charCodes[0]
757			endCode = []
758			startCode = [lastCode]
759			for charCode in charCodes[1:]:  # skip the first code, it's the first start code
760				if charCode == lastCode + 1:
761					lastCode = charCode
762					continue
763				start, end = splitRange(startCode[-1], lastCode, cmap)
764				startCode.extend(start)
765				endCode.extend(end)
766				startCode.append(charCode)
767				lastCode = charCode
768			endCode.append(lastCode)
769			startCode.append(0xffff)
770			endCode.append(0xffff)
771
772		# build up rest of cruft
773		idDelta = []
774		idRangeOffset = []
775		glyphIndexArray = []
776		for i in range(len(endCode)-1):  # skip the closing codes (0xffff)
777			indices = []
778			for charCode in range(startCode[i], endCode[i] + 1):
779				indices.append(cmap[charCode])
780			if  (indices == list(range(indices[0], indices[0] + len(indices)))):
781				idDeltaTemp = self.setIDDelta(indices[0] - startCode[i])
782				idDelta.append( idDeltaTemp)
783				idRangeOffset.append(0)
784			else:
785				# someone *definitely* needs to get killed.
786				idDelta.append(0)
787				idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i))
788				glyphIndexArray.extend(indices)
789		idDelta.append(1)  # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef
790		idRangeOffset.append(0)
791
792		# Insane.
793		segCount = len(endCode)
794		segCountX2 = segCount * 2
795		maxExponent = maxPowerOfTwo(segCount)
796		searchRange = 2 * (2 ** maxExponent)
797		entrySelector = maxExponent
798		rangeShift = 2 * segCount - searchRange
799
800		charCodeArray = array.array("H", endCode + [0] + startCode)
801		idDeltaeArray = array.array("h", idDelta)
802		restArray = array.array("H", idRangeOffset + glyphIndexArray)
803		if sys.byteorder != "big":
804			charCodeArray.byteswap()
805			idDeltaeArray.byteswap()
806			restArray.byteswap()
807		data = charCodeArray.tostring() + idDeltaeArray.tostring() + restArray.tostring()
808
809		length = struct.calcsize(cmap_format_4_format) + len(data)
810		header = struct.pack(cmap_format_4_format, self.format, length, self.language,
811				segCountX2, searchRange, entrySelector, rangeShift)
812		return header + data
813
814	def fromXML(self, name, attrs, content, ttFont):
815		self.language = safeEval(attrs["language"])
816		if not hasattr(self, "cmap"):
817			self.cmap = {}
818		cmap = self.cmap
819
820		for element in content:
821			if not isinstance(element, TupleType):
822				continue
823			nameMap, attrsMap, dummyContent = element
824			if nameMap != "map":
825				assert 0, "Unrecognized keyword in cmap subtable"
826			cmap[safeEval(attrsMap["code"])] = attrsMap["name"]
827
828
829class cmap_format_6(CmapSubtable):
830
831	def decompile(self, data, ttFont):
832		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
833		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
834		if data != None and ttFont != None:
835			self.decompileHeader(data[offset:offset+int(length)], ttFont)
836		else:
837			assert (data == None and ttFont == None), "Need both data and ttFont arguments"
838
839		data = self.data # decompileHeader assigns the data after the header to self.data
840		firstCode, entryCount = struct.unpack(">HH", data[:4])
841		firstCode = int(firstCode)
842		data = data[4:]
843		#assert len(data) == 2 * entryCount  # XXX not true in Apple's Helvetica!!!
844		glyphIndexArray = array.array("H")
845		glyphIndexArray.fromstring(data[:2 * int(entryCount)])
846		if sys.byteorder != "big":
847			glyphIndexArray.byteswap()
848		self.data = data = None
849
850		self.cmap = cmap = {}
851
852		lenArray = len(glyphIndexArray)
853		charCodes = list(range(firstCode, firstCode + lenArray))
854		glyphOrder = self.ttFont.getGlyphOrder()
855		try:
856			names = list(map(operator.getitem, [glyphOrder]*lenArray, glyphIndexArray ))
857		except IndexError:
858			getGlyphName = self.ttFont.getGlyphName
859			names = list(map(getGlyphName, glyphIndexArray ))
860		list(map(operator.setitem, [cmap]*lenArray, charCodes, names))
861
862	def compile(self, ttFont):
863		if self.data:
864			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
865		cmap = self.cmap
866		codes = cmap.keys()
867		if codes: # yes, there are empty cmap tables.
868			codes = list(range(codes[0], codes[-1] + 1))
869			firstCode = codes[0]
870			valueList = [cmap.get(code, ".notdef") for code in codes]
871			valueList = map(ttFont.getGlyphID, valueList)
872			glyphIndexArray = array.array("H", valueList)
873			if sys.byteorder != "big":
874				glyphIndexArray.byteswap()
875			data = glyphIndexArray.tostring()
876		else:
877			data = ""
878			firstCode = 0
879		header = struct.pack(">HHHHH",
880				6, len(data) + 10, self.language, firstCode, len(codes))
881		return header + data
882
883	def fromXML(self, name, attrs, content, ttFont):
884		self.language = safeEval(attrs["language"])
885		if not hasattr(self, "cmap"):
886			self.cmap = {}
887		cmap = self.cmap
888
889		for element in content:
890			if not isinstance(element, TupleType):
891				continue
892			name, attrs, content = element
893			if name != "map":
894				continue
895			cmap[safeEval(attrs["code"])] = attrs["name"]
896
897
898class cmap_format_12_or_13(CmapSubtable):
899
900	def __init__(self, format):
901		self.format = format
902		self.reserved = 0
903		self.data = None
904		self.ttFont = None
905
906	def decompileHeader(self, data, ttFont):
907		format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16])
908		assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length)
909		self.format = format
910		self.reserved = reserved
911		self.length = length
912		self.language = language
913		self.nGroups = nGroups
914		self.data = data[16:]
915		self.ttFont = ttFont
916
917	def decompile(self, data, ttFont):
918		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
919		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
920		if data != None and ttFont != None:
921			self.decompileHeader(data[offset:offset+int(length)], ttFont)
922		else:
923			assert (data == None and ttFont == None), "Need both data and ttFont arguments"
924
925		data = self.data # decompileHeader assigns the data after the header to self.data
926		charCodes = []
927		gids = []
928		pos = 0
929		for i in range(self.nGroups):
930			startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] )
931			pos += 12
932			lenGroup = 1 + endCharCode - startCharCode
933			charCodes += list(range(startCharCode, endCharCode +1))
934			gids += self._computeGIDs(glyphID, lenGroup)
935		self.data = data = None
936		self.cmap = cmap = {}
937		lenCmap = len(gids)
938		glyphOrder = self.ttFont.getGlyphOrder()
939		try:
940			names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids ))
941		except IndexError:
942			getGlyphName = self.ttFont.getGlyphName
943			names = list(map(getGlyphName, gids ))
944		list(map(operator.setitem, [cmap]*lenCmap, charCodes, names))
945
946	def compile(self, ttFont):
947		if self.data:
948			return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data
949		charCodes = self.cmap.keys()
950		lenCharCodes = len(charCodes)
951		names = self.cmap.values()
952		nameMap = ttFont.getReverseGlyphMap()
953		try:
954			gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
955		except KeyError:
956			nameMap = ttFont.getReverseGlyphMap(rebuild=1)
957			try:
958				gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
959			except KeyError:
960				# allow virtual GIDs in format 12 tables
961				gids = []
962				for name in names:
963					try:
964						gid = nameMap[name]
965					except KeyError:
966						try:
967							if (name[:3] == 'gid'):
968								gid = eval(name[3:])
969							else:
970								gid = ttFont.getGlyphID(name)
971						except:
972							raise KeyError(name)
973
974					gids.append(gid)
975
976		cmap = {}  # code:glyphID mapping
977		list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids))
978
979		charCodes.sort()
980		index = 0
981		startCharCode = charCodes[0]
982		startGlyphID = cmap[startCharCode]
983		lastGlyphID = startGlyphID - self._format_step
984		lastCharCode = startCharCode - 1
985		nGroups = 0
986		dataList =  []
987		maxIndex = len(charCodes)
988		for index in range(maxIndex):
989			charCode = charCodes[index]
990			glyphID = cmap[charCode]
991			if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode):
992				dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
993				startCharCode = charCode
994				startGlyphID = glyphID
995				nGroups = nGroups + 1
996			lastGlyphID = glyphID
997			lastCharCode = charCode
998		dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
999		nGroups = nGroups + 1
1000		data = "".join(dataList)
1001		lengthSubtable = len(data) +16
1002		assert len(data) == (nGroups*12) == (lengthSubtable-16)
1003		return struct.pack(">HHLLL", self.format, self.reserved , lengthSubtable, self.language, nGroups) + data
1004
1005	def toXML(self, writer, ttFont):
1006		writer.begintag(self.__class__.__name__, [
1007				("platformID", self.platformID),
1008				("platEncID", self.platEncID),
1009				("format", self.format),
1010				("reserved", self.reserved),
1011				("length", self.length),
1012				("language", self.language),
1013				("nGroups", self.nGroups),
1014				])
1015		writer.newline()
1016		codes = sorted(self.cmap.items())
1017		self._writeCodes(codes, writer)
1018		writer.endtag(self.__class__.__name__)
1019		writer.newline()
1020
1021	def fromXML(self, name, attrs, content, ttFont):
1022		self.format = safeEval(attrs["format"])
1023		self.reserved = safeEval(attrs["reserved"])
1024		self.length = safeEval(attrs["length"])
1025		self.language = safeEval(attrs["language"])
1026		self.nGroups = safeEval(attrs["nGroups"])
1027		if not hasattr(self, "cmap"):
1028			self.cmap = {}
1029		cmap = self.cmap
1030
1031		for element in content:
1032			if not isinstance(element, TupleType):
1033				continue
1034			name, attrs, content = element
1035			if name != "map":
1036				continue
1037			cmap[safeEval(attrs["code"])] = attrs["name"]
1038
1039
1040class cmap_format_12(cmap_format_12_or_13):
1041	def __init__(self, format):
1042		cmap_format_12_or_13.__init__(self, format)
1043		self._format_step = 1
1044
1045	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
1046		return list(range(startingGlyph, startingGlyph + numberOfGlyphs))
1047
1048	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
1049		return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode)
1050
1051
1052class cmap_format_13(cmap_format_12_or_13):
1053	def __init__(self, format):
1054		cmap_format_12_or_13.__init__(self, format)
1055		self._format_step = 0
1056
1057	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
1058		return [startingGlyph] * numberOfGlyphs
1059
1060	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
1061		return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode)
1062
1063
1064def  cvtToUVS(threeByteString):
1065	if sys.byteorder != "big":
1066		data = "\0" +threeByteString
1067	else:
1068		data = threeByteString + "\0"
1069	val, = struct.unpack(">L", data)
1070	return val
1071
1072def  cvtFromUVS(val):
1073	if sys.byteorder != "big":
1074		threeByteString = struct.pack(">L", val)[1:]
1075	else:
1076		threeByteString = struct.pack(">L", val)[:3]
1077	return threeByteString
1078
1079def cmpUVSListEntry(first, second):
1080	uv1, glyphName1 = first
1081	uv2, glyphName2 = second
1082
1083	if (glyphName1 == None) and (glyphName2 != None):
1084		return -1
1085	elif (glyphName2 == None) and (glyphName1 != None):
1086		return 1
1087
1088	ret = cmp(uv1, uv2)
1089	if ret:
1090		return ret
1091	return cmp(glyphName1, glyphName2)
1092
1093
1094class cmap_format_14(CmapSubtable):
1095
1096	def decompileHeader(self, data, ttFont):
1097		format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10])
1098		self.data = data[10:]
1099		self.length = length
1100		self.numVarSelectorRecords = numVarSelectorRecords
1101		self.ttFont = ttFont
1102		self.language = 0xFF # has no language.
1103
1104	def decompile(self, data, ttFont):
1105		if data != None and ttFont != None and ttFont.lazy:
1106			self.decompileHeader(data, ttFont)
1107		else:
1108			assert (data == None and ttFont == None), "Need both data and ttFont arguments"
1109		data = self.data
1110
1111		self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1112		uvsDict = {}
1113		recOffset = 0
1114		for n in range(self.numVarSelectorRecords):
1115			uvs, defOVSOffset, nonDefUVSOffset =  struct.unpack(">3sLL", data[recOffset:recOffset +11])
1116			recOffset += 11
1117			varUVS = cvtToUVS(uvs)
1118			if defOVSOffset:
1119				startOffset = defOVSOffset  - 10
1120				numValues, = struct.unpack(">L", data[startOffset:startOffset+4])
1121				startOffset +=4
1122				for r in range(numValues):
1123					uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4])
1124					startOffset += 4
1125					firstBaseUV = cvtToUVS(uv)
1126					cnt = addtlCnt+1
1127					baseUVList = list(range(firstBaseUV, firstBaseUV+cnt))
1128					glyphList = [None]*cnt
1129					localUVList = zip(baseUVList, glyphList)
1130					try:
1131						uvsDict[varUVS].extend(localUVList)
1132					except KeyError:
1133						uvsDict[varUVS] = list(localUVList)
1134
1135			if nonDefUVSOffset:
1136				startOffset = nonDefUVSOffset  - 10
1137				numRecs, = struct.unpack(">L", data[startOffset:startOffset+4])
1138				startOffset +=4
1139				localUVList = []
1140				for r in range(numRecs):
1141					uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5])
1142					startOffset += 5
1143					uv = cvtToUVS(uv)
1144					glyphName = self.ttFont.getGlyphName(gid)
1145					localUVList.append( [uv, glyphName] )
1146				try:
1147					uvsDict[varUVS].extend(localUVList)
1148				except KeyError:
1149					uvsDict[varUVS] = localUVList
1150
1151		self.uvsDict = uvsDict
1152
1153	def toXML(self, writer, ttFont):
1154		writer.begintag(self.__class__.__name__, [
1155				("platformID", self.platformID),
1156				("platEncID", self.platEncID),
1157				("format", self.format),
1158				("length", self.length),
1159				("numVarSelectorRecords", self.numVarSelectorRecords),
1160				])
1161		writer.newline()
1162		uvsDict = self.uvsDict
1163		uvsList = sorted(uvsDict.keys())
1164		for uvs in uvsList:
1165			uvList = uvsDict[uvs]
1166			uvList.sort(cmpUVSListEntry)
1167			for uv, gname in uvList:
1168				if gname == None:
1169					gname = "None"
1170				# I use the arg rather than th keyword syntax in order to preserve the attribute order.
1171				writer.simpletag("map", [ ("uvs",hex(uvs)), ("uv",hex(uv)), ("name", gname)]  )
1172				writer.newline()
1173		writer.endtag(self.__class__.__name__)
1174		writer.newline()
1175
1176	def fromXML(self, name, attrs, content, ttFont):
1177		self.format = safeEval(attrs["format"])
1178		self.length = safeEval(attrs["length"])
1179		self.numVarSelectorRecords = safeEval(attrs["numVarSelectorRecords"])
1180		self.language = 0xFF # provide a value so that  CmapSubtable.__cmp__() won't fail
1181		if not hasattr(self, "cmap"):
1182			self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1183		if not hasattr(self, "uvsDict"):
1184			self.uvsDict  = {}
1185			uvsDict = self.uvsDict
1186
1187		for element in content:
1188			if not isinstance(element, TupleType):
1189				continue
1190			name, attrs, content = element
1191			if name != "map":
1192				continue
1193			uvs = safeEval(attrs["uvs"])
1194			uv = safeEval(attrs["uv"])
1195			gname = attrs["name"]
1196			if gname == "None":
1197				gname = None
1198			try:
1199				uvsDict[uvs].append( [uv, gname])
1200			except KeyError:
1201				uvsDict[uvs] = [ [uv, gname] ]
1202
1203
1204	def compile(self, ttFont):
1205		if self.data:
1206			return struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) + self.data
1207
1208		uvsDict = self.uvsDict
1209		uvsList = sorted(uvsDict.keys())
1210		self.numVarSelectorRecords = len(uvsList)
1211		offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block.
1212		data = []
1213		varSelectorRecords =[]
1214		for uvs in uvsList:
1215			entryList = uvsDict[uvs]
1216
1217			defList = [entry for entry in entryList if entry[1] == None]
1218			if defList:
1219				defList = [entry[0] for entry in defList]
1220				defOVSOffset = offset
1221				defList.sort()
1222
1223				lastUV = defList[0]
1224				cnt = -1
1225				defRecs = []
1226				for defEntry in defList:
1227					cnt +=1
1228					if (lastUV+cnt) != defEntry:
1229						rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1)
1230						lastUV = defEntry
1231						defRecs.append(rec)
1232						cnt = 0
1233
1234				rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt)
1235				defRecs.append(rec)
1236
1237				numDefRecs = len(defRecs)
1238				data.append(struct.pack(">L", numDefRecs))
1239				data.extend(defRecs)
1240				offset += 4 + numDefRecs*4
1241			else:
1242				defOVSOffset = 0
1243
1244			ndefList = [entry for entry in entryList if entry[1] != None]
1245			if ndefList:
1246				nonDefUVSOffset = offset
1247				ndefList.sort()
1248				numNonDefRecs = len(ndefList)
1249				data.append(struct.pack(">L", numNonDefRecs))
1250				offset += 4 + numNonDefRecs*5
1251
1252				for uv, gname in ndefList:
1253					gid = ttFont.getGlyphID(gname)
1254					ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid)
1255					data.append(ndrec)
1256			else:
1257				nonDefUVSOffset = 0
1258
1259			vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset)
1260			varSelectorRecords.append(vrec)
1261
1262		data = "".join(varSelectorRecords) + "".join(data)
1263		self.length = 10 + len(data)
1264		headerdata = struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords)
1265		self.data = headerdata + data
1266
1267		return self.data
1268
1269
1270class cmap_format_unknown(CmapSubtable):
1271
1272	def toXML(self, writer, ttFont):
1273		cmapName = self.__class__.__name__[:12] + str(self.format)
1274		writer.begintag(cmapName, [
1275				("platformID", self.platformID),
1276				("platEncID", self.platEncID),
1277				])
1278		writer.newline()
1279		writer.dumphex(self.data)
1280		writer.endtag(cmapName)
1281		writer.newline()
1282
1283	def fromXML(self, name, attrs, content, ttFont):
1284		self.data = readHex(content)
1285		self.cmap = {}
1286
1287	def decompileHeader(self, data, ttFont):
1288		self.language = 0  # dummy value
1289		self.data = data
1290
1291	def decompile(self, data, ttFont):
1292		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
1293		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
1294		if data != None and ttFont != None:
1295			self.decompileHeader(data[offset:offset+int(length)], ttFont)
1296		else:
1297			assert (data == None and ttFont == None), "Need both data and ttFont arguments"
1298
1299	def compile(self, ttFont):
1300		if self.data:
1301			return self.data
1302		else:
1303			return None
1304
1305cmap_classes = {
1306		0: cmap_format_0,
1307		2: cmap_format_2,
1308		4: cmap_format_4,
1309		6: cmap_format_6,
1310		12: cmap_format_12,
1311		13: cmap_format_13,
1312		14: cmap_format_14,
1313		}
1314