_c_m_a_p.py revision 3a9fd301808f5a8991ca9ac44028d1ecb22d307f
1import sys
2from . import DefaultTable
3import struct
4import array
5import operator
6from fontTools import ttLib
7from fontTools.misc.textTools import safeEval, readHex
8from types import TupleType
9
10
11class table__c_m_a_p(DefaultTable.DefaultTable):
12
13	def getcmap(self, platformID, platEncID):
14		for subtable in self.tables:
15			if (subtable.platformID == platformID and
16					subtable.platEncID == platEncID):
17				return subtable
18		return None # not found
19
20	def decompile(self, data, ttFont):
21		tableVersion, numSubTables = struct.unpack(">HH", data[:4])
22		self.tableVersion = int(tableVersion)
23		self.tables = tables = []
24		seenOffsets = {}
25		for i in range(numSubTables):
26			platformID, platEncID, offset = struct.unpack(
27					">HHl", data[4+i*8:4+(i+1)*8])
28			platformID, platEncID = int(platformID), int(platEncID)
29			format, length = struct.unpack(">HH", data[offset:offset+4])
30			if format in [8,10,12,13]:
31				format, reserved, length = struct.unpack(">HHL", data[offset:offset+8])
32			elif format in [14]:
33				format, length = struct.unpack(">HL", data[offset:offset+6])
34
35			if not length:
36				print "Error: cmap subtable is reported as having zero length: platformID %s, platEncID %s,  format %s offset %s. Skipping table." % (platformID, platEncID,format, offset)
37				continue
38			if format not in cmap_classes:
39				table = cmap_format_unknown(format)
40			else:
41				table = cmap_classes[format](format)
42			table.platformID = platformID
43			table.platEncID = platEncID
44			# Note that by default we decompile only the subtable header info;
45			# any other data gets decompiled only when an attribute of the
46			# subtable is referenced.
47			table.decompileHeader(data[offset:offset+int(length)], ttFont)
48			if offset in seenOffsets:
49				table.cmap = tables[seenOffsets[offset]].cmap
50			else:
51				seenOffsets[offset] = i
52			tables.append(table)
53
54	def compile(self, ttFont):
55		self.tables.sort()    # sort according to the spec; see CmapSubtable.__cmp__()
56		numSubTables = len(self.tables)
57		totalOffset = 4 + 8 * numSubTables
58		data = struct.pack(">HH", self.tableVersion, numSubTables)
59		tableData = ""
60		seen = {}  # Some tables are the same object reference. Don't compile them twice.
61		done = {}  # Some tables are different objects, but compile to the same data chunk
62		for table in self.tables:
63			try:
64				offset = seen[id(table.cmap)]
65			except KeyError:
66				chunk = table.compile(ttFont)
67				if chunk in done:
68					offset = done[chunk]
69				else:
70					offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData)
71					tableData = tableData + chunk
72			data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset)
73		return data + tableData
74
75	def toXML(self, writer, ttFont):
76		writer.simpletag("tableVersion", version=self.tableVersion)
77		writer.newline()
78		for table in self.tables:
79			table.toXML(writer, ttFont)
80
81	def fromXML(self, name, attrs, content, ttFont):
82		if name == "tableVersion":
83			self.tableVersion = safeEval(attrs["version"])
84			return
85		if name[:12] != "cmap_format_":
86			return
87		if not hasattr(self, "tables"):
88			self.tables = []
89		format = safeEval(name[12:])
90		if format not in cmap_classes:
91			table = cmap_format_unknown(format)
92		else:
93			table = cmap_classes[format](format)
94		table.platformID = safeEval(attrs["platformID"])
95		table.platEncID = safeEval(attrs["platEncID"])
96		table.fromXML(name, attrs, content, ttFont)
97		self.tables.append(table)
98
99
100class CmapSubtable:
101
102	def __init__(self, format):
103		self.format = format
104		self.data = None
105		self.ttFont = None
106
107	def __getattr__(self, attr):
108		# allow lazy decompilation of subtables.
109		if attr[:2] == '__': # don't handle requests for member functions like '__lt__'
110			raise AttributeError(attr)
111		if self.data == None:
112			raise AttributeError(attr)
113		self.decompile(None, None) # use saved data.
114		self.data = None # Once this table has been decompiled, make sure we don't
115						# just return the original data. Also avoids recursion when
116						# called with an attribute that the cmap subtable doesn't have.
117		return getattr(self, attr)
118
119	def decompileHeader(self, data, ttFont):
120		format, length, language = struct.unpack(">HHH", data[:6])
121		assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length)
122		self.format = int(format)
123		self.length = int(length)
124		self.language = int(language)
125		self.data = data[6:]
126		self.ttFont = ttFont
127
128	def toXML(self, writer, ttFont):
129		writer.begintag(self.__class__.__name__, [
130				("platformID", self.platformID),
131				("platEncID", self.platEncID),
132				("language", self.language),
133				])
134		writer.newline()
135		codes = self.cmap.items()
136		codes.sort()
137		self._writeCodes(codes, writer)
138		writer.endtag(self.__class__.__name__)
139		writer.newline()
140
141	def _writeCodes(self, codes, writer):
142		if (self.platformID, self.platEncID) == (3, 1) or (self.platformID, self.platEncID) == (3, 10) or self.platformID == 0:
143			from fontTools.unicode import Unicode
144			isUnicode = 1
145		else:
146			isUnicode = 0
147		for code, name in codes:
148			writer.simpletag("map", code=hex(code), name=name)
149			if isUnicode:
150				writer.comment(Unicode[code])
151			writer.newline()
152
153	def __cmp__(self, other):
154		if type(self) != type(other): return cmp(type(self), type(other))
155
156		# implemented so that list.sort() sorts according to the cmap spec.
157		selfTuple = (
158			getattr(self, "platformID", None),
159			getattr(self, "platEncID", None),
160			getattr(self, "language", None),
161			self.__dict__)
162		otherTuple = (
163			getattr(other, "platformID", None),
164			getattr(other, "platEncID", None),
165			getattr(other, "language", None),
166			other.__dict__)
167		return cmp(selfTuple, otherTuple)
168
169
170class cmap_format_0(CmapSubtable):
171
172	def decompile(self, data, ttFont):
173		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
174		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
175		if data != None and ttFont != None:
176			self.decompileHeader(data[offset:offset+int(length)], ttFont)
177		else:
178			assert (data == None and ttFont == None), "Need both data and ttFont arguments"
179		data = self.data # decompileHeader assigns the data after the header to self.data
180		assert 262 == self.length, "Format 0 cmap subtable not 262 bytes"
181		glyphIdArray = array.array("B")
182		glyphIdArray.fromstring(self.data)
183		self.cmap = cmap = {}
184		lenArray = len(glyphIdArray)
185		charCodes = range(lenArray)
186		names = map(self.ttFont.getGlyphName, glyphIdArray)
187		map(operator.setitem, [cmap]*lenArray, charCodes, names)
188
189
190	def compile(self, ttFont):
191		if self.data:
192			return struct.pack(">HHH", 0, 262, self.language) + self.data
193
194		charCodeList = self.cmap.items()
195		charCodeList.sort()
196		charCodes = [entry[0] for entry in charCodeList]
197		valueList = [entry[1] for entry in charCodeList]
198		assert charCodes == range(256)
199		valueList = map(ttFont.getGlyphID, valueList)
200
201		glyphIdArray = array.array("B", valueList)
202		data = struct.pack(">HHH", 0, 262, self.language) + glyphIdArray.tostring()
203		assert len(data) == 262
204		return data
205
206	def fromXML(self, name, attrs, content, ttFont):
207		self.language = safeEval(attrs["language"])
208		if not hasattr(self, "cmap"):
209			self.cmap = {}
210		cmap = self.cmap
211		for element in content:
212			if type(element) != TupleType:
213				continue
214			name, attrs, content = element
215			if name != "map":
216				continue
217			cmap[safeEval(attrs["code"])] = attrs["name"]
218
219
220subHeaderFormat = ">HHhH"
221class SubHeader:
222	def __init__(self):
223		self.firstCode = None
224		self.entryCount = None
225		self.idDelta = None
226		self.idRangeOffset = None
227		self.glyphIndexArray = []
228
229class cmap_format_2(CmapSubtable):
230
231	def setIDDelta(self, subHeader):
232		subHeader.idDelta = 0
233		# find the minGI which is not zero.
234		minGI = subHeader.glyphIndexArray[0]
235		for gid in subHeader.glyphIndexArray:
236			if (gid != 0) and (gid < minGI):
237				minGI = gid
238		# The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
239		# idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K.
240		# We would like to pick an idDelta such that the first glyphArray GID is 1,
241		# so that we are more likely to be able to combine glypharray GID subranges.
242		# This means that we have a problem when minGI is > 32K
243		# Since the final gi is reconstructed from the glyphArray GID by:
244		#    (short)finalGID = (gid +  idDelta) % 0x10000),
245		# we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the
246		# negative number to an unsigned short.
247
248		if  (minGI > 1):
249			if  minGI > 0x7FFF:
250				subHeader.idDelta = -(0x10000 - minGI) -1
251			else:
252				subHeader.idDelta =  minGI -1
253			idDelta = subHeader.idDelta
254			for i in range(subHeader.entryCount):
255				gid = subHeader.glyphIndexArray[i]
256				if gid > 0:
257					subHeader.glyphIndexArray[i] = gid - idDelta
258
259
260	def decompile(self, data, ttFont):
261		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
262		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
263		if data != None and ttFont != None:
264			self.decompileHeader(data[offset:offset+int(length)], ttFont)
265		else:
266			assert (data == None and ttFont == None), "Need both data and ttFont arguments"
267
268		data = self.data # decompileHeader assigns the data after the header to self.data
269		subHeaderKeys = []
270		maxSubHeaderindex = 0
271		# get the key array, and determine the number of subHeaders.
272		allKeys = array.array("H")
273		allKeys.fromstring(data[:512])
274		data = data[512:]
275		if sys.byteorder != "big":
276			allKeys.byteswap()
277		subHeaderKeys = [ key/8 for key in allKeys]
278		maxSubHeaderindex = max(subHeaderKeys)
279
280		#Load subHeaders
281		subHeaderList = []
282		pos = 0
283		for i in range(maxSubHeaderindex + 1):
284			subHeader = SubHeader()
285			(subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \
286				subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8])
287			pos += 8
288			giDataPos = pos + subHeader.idRangeOffset-2
289			giList = array.array("H")
290			giList.fromstring(data[giDataPos:giDataPos + subHeader.entryCount*2])
291			if sys.byteorder != "big":
292				giList.byteswap()
293			subHeader.glyphIndexArray = giList
294			subHeaderList.append(subHeader)
295		# How this gets processed.
296		# Charcodes may be one or two bytes.
297		# The first byte of a charcode is mapped through the  subHeaderKeys, to select
298		# a subHeader. For any subheader but 0, the next byte is then mapped through the
299		# selected subheader. If subheader Index 0 is selected, then the byte itself is
300		# mapped through the subheader, and there is no second byte.
301		# Then assume that the subsequent byte is the first byte of the next charcode,and repeat.
302		#
303		# Each subheader references a range in the glyphIndexArray whose length is entryCount.
304		# The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray
305		# referenced by another subheader.
306		# The only subheader that will be referenced by more than one first-byte value is the subheader
307		# that maps the entire range of glyphID values to glyphIndex 0, e.g notdef:
308		#	 {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx}
309		# A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex.
310		# A subheader specifies a subrange within (0...256) by the
311		# firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero
312		# (e.g. glyph not in font).
313		# If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar).
314		# The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by
315		# counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the
316		# glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex.
317		# Example for Logocut-Medium
318		# first byte of charcode = 129; selects subheader 1.
319		# subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252}
320		# second byte of charCode = 66
321		# the index offset = 66-64 = 2.
322		# The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is:
323		# [glyphIndexArray index], [subrange array index] = glyphIndex
324		# [256], [0]=1 	from charcode [129, 64]
325		# [257], [1]=2  	from charcode [129, 65]
326		# [258], [2]=3  	from charcode [129, 66]
327		# [259], [3]=4  	from charcode [129, 67]
328		# So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero,
329		# add it to the glyphID to get the final glyphIndex
330		# value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew!
331
332		self.data = ""
333		self.cmap = cmap = {}
334		notdefGI = 0
335		for firstByte in range(256):
336			subHeadindex = subHeaderKeys[firstByte]
337			subHeader = subHeaderList[subHeadindex]
338			if subHeadindex == 0:
339				if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount):
340					continue # gi is notdef.
341				else:
342					charCode = firstByte
343					offsetIndex = firstByte - subHeader.firstCode
344					gi = subHeader.glyphIndexArray[offsetIndex]
345					if gi != 0:
346						gi = (gi + subHeader.idDelta) % 0x10000
347					else:
348						continue # gi is notdef.
349				cmap[charCode] = gi
350			else:
351				if subHeader.entryCount:
352					charCodeOffset = firstByte * 256 + subHeader.firstCode
353					for offsetIndex in range(subHeader.entryCount):
354						charCode = charCodeOffset + offsetIndex
355						gi = subHeader.glyphIndexArray[offsetIndex]
356						if gi != 0:
357							gi = (gi + subHeader.idDelta) % 0x10000
358						else:
359							continue
360						cmap[charCode] = gi
361				# If not subHeader.entryCount, then all char codes with this first byte are
362				# mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the
363				# same as mapping it to .notdef.
364		# cmap values are GID's.
365		glyphOrder = self.ttFont.getGlyphOrder()
366		gids = cmap.values()
367		charCodes = cmap.keys()
368		lenCmap = len(gids)
369		try:
370			names = map(operator.getitem, [glyphOrder]*lenCmap, gids )
371		except IndexError:
372			getGlyphName = self.ttFont.getGlyphName
373			names = map(getGlyphName, gids )
374		map(operator.setitem, [cmap]*lenCmap, charCodes, names)
375
376
377	def compile(self, ttFont):
378		if self.data:
379			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
380		kEmptyTwoCharCodeRange = -1
381		notdefGI = 0
382
383		items = self.cmap.items()
384		items.sort()
385		charCodes = [item[0] for item in items]
386		names = [item[1] for item in items]
387		nameMap = ttFont.getReverseGlyphMap()
388		lenCharCodes = len(charCodes)
389		try:
390			gids = map(operator.getitem, [nameMap]*lenCharCodes, names)
391		except KeyError:
392			nameMap = ttFont.getReverseGlyphMap(rebuild=1)
393			try:
394				gids = map(operator.getitem, [nameMap]*lenCharCodes, names)
395			except KeyError:
396				# allow virtual GIDs in format 2 tables
397				gids = []
398				for name in names:
399					try:
400						gid = nameMap[name]
401					except KeyError:
402						try:
403							if (name[:3] == 'gid'):
404								gid = eval(name[3:])
405							else:
406								gid = ttFont.getGlyphID(name)
407						except:
408							raise KeyError(name)
409
410					gids.append(gid)
411
412		# Process the (char code to gid) item list  in char code order.
413		# By definition, all one byte char codes map to subheader 0.
414		# For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0,
415		# which defines all char codes in its range to map to notdef) unless proven otherwise.
416		# Note that since the char code items are processed in char code order, all the char codes with the
417		# same first byte are in sequential order.
418
419		subHeaderKeys = [ kEmptyTwoCharCodeRange for x in  range(256)] # list of indices into subHeaderList.
420		subHeaderList = []
421
422		# We force this subheader entry 0  to exist in the subHeaderList in the case where some one comes up
423		# with a cmap where all the one byte char codes map to notdef,
424		# with the result that the subhead 0 would not get created just by processing the item list.
425		charCode = charCodes[0]
426		if charCode > 255:
427			subHeader = SubHeader()
428			subHeader.firstCode = 0
429			subHeader.entryCount = 0
430			subHeader.idDelta = 0
431			subHeader.idRangeOffset = 0
432			subHeaderList.append(subHeader)
433
434
435		lastFirstByte = -1
436		items = zip(charCodes, gids)
437		for charCode, gid in items:
438			if gid == 0:
439				continue
440			firstbyte = charCode >> 8
441			secondByte = charCode & 0x00FF
442
443			if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one.
444				if lastFirstByte > -1:
445					# fix GI's and iDelta of current subheader.
446					self.setIDDelta(subHeader)
447
448					# If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero
449					# for the indices matching the char codes.
450					if lastFirstByte == 0:
451						for index in range(subHeader.entryCount):
452							charCode = subHeader.firstCode + index
453							subHeaderKeys[charCode] = 0
454
455					assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange."
456				# init new subheader
457				subHeader = SubHeader()
458				subHeader.firstCode = secondByte
459				subHeader.entryCount = 1
460				subHeader.glyphIndexArray.append(gid)
461				subHeaderList.append(subHeader)
462				subHeaderKeys[firstbyte] = len(subHeaderList) -1
463				lastFirstByte = firstbyte
464			else:
465				# need to fill in with notdefs all the code points between the last charCode and the current charCode.
466				codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount)
467				for i in range(codeDiff):
468					subHeader.glyphIndexArray.append(notdefGI)
469				subHeader.glyphIndexArray.append(gid)
470				subHeader.entryCount = subHeader.entryCount + codeDiff + 1
471
472		# fix GI's and iDelta of last subheader that we we added to the subheader array.
473		self.setIDDelta(subHeader)
474
475		# Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges.
476		subHeader = SubHeader()
477		subHeader.firstCode = 0
478		subHeader.entryCount = 0
479		subHeader.idDelta = 0
480		subHeader.idRangeOffset = 2
481		subHeaderList.append(subHeader)
482		emptySubheadIndex = len(subHeaderList) - 1
483		for index in range(256):
484			if subHeaderKeys[index] == kEmptyTwoCharCodeRange:
485				subHeaderKeys[index] = emptySubheadIndex
486		# Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the
487		# idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray,
488		# since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with
489		# charcode 0 and GID 0.
490
491		idRangeOffset = (len(subHeaderList)-1)*8  + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset.
492		subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2.
493		for index in range(subheadRangeLen):
494			subHeader = subHeaderList[index]
495			subHeader.idRangeOffset = 0
496			for j  in range(index):
497				prevSubhead = subHeaderList[j]
498				if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray
499					subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8
500					subHeader.glyphIndexArray = []
501					break
502			if subHeader.idRangeOffset == 0: # didn't find one.
503				subHeader.idRangeOffset = idRangeOffset
504				idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray.
505			else:
506				idRangeOffset = idRangeOffset - 8  # one less subheader
507
508		# Now we can write out the data!
509		length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array.
510		for subhead in 	subHeaderList[:-1]:
511			length = length + len(subhead.glyphIndexArray)*2  # We can't use subhead.entryCount, as some of the subhead may share subArrays.
512		dataList = [struct.pack(">HHH", 2, length, self.language)]
513		for index in subHeaderKeys:
514			dataList.append(struct.pack(">H", index*8))
515		for subhead in 	subHeaderList:
516			dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset))
517		for subhead in 	subHeaderList[:-1]:
518			for gi in subhead.glyphIndexArray:
519				dataList.append(struct.pack(">H", gi))
520		data = "".join(dataList)
521		assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length)
522		return data
523
524
525	def fromXML(self, name, attrs, content, ttFont):
526		self.language = safeEval(attrs["language"])
527		if not hasattr(self, "cmap"):
528			self.cmap = {}
529		cmap = self.cmap
530
531		for element in content:
532			if type(element) != TupleType:
533				continue
534			name, attrs, content = element
535			if name != "map":
536				continue
537			cmap[safeEval(attrs["code"])] = attrs["name"]
538
539
540cmap_format_4_format = ">7H"
541
542#uint16  endCode[segCount]          # Ending character code for each segment, last = 0xFFFF.
543#uint16  reservedPad                # This value should be zero
544#uint16  startCode[segCount]        # Starting character code for each segment
545#uint16  idDelta[segCount]          # Delta for all character codes in segment
546#uint16  idRangeOffset[segCount]    # Offset in bytes to glyph indexArray, or 0
547#uint16  glyphIndexArray[variable]  # Glyph index array
548
549def splitRange(startCode, endCode, cmap):
550	# Try to split a range of character codes into subranges with consecutive
551	# glyph IDs in such a way that the cmap4 subtable can be stored "most"
552	# efficiently. I can't prove I've got the optimal solution, but it seems
553	# to do well with the fonts I tested: none became bigger, many became smaller.
554	if startCode == endCode:
555		return [], [endCode]
556
557	lastID = cmap[startCode]
558	lastCode = startCode
559	inOrder = None
560	orderedBegin = None
561	subRanges = []
562
563	# Gather subranges in which the glyph IDs are consecutive.
564	for code in range(startCode + 1, endCode + 1):
565		glyphID = cmap[code]
566
567		if glyphID - 1 == lastID:
568			if inOrder is None or not inOrder:
569				inOrder = 1
570				orderedBegin = lastCode
571		else:
572			if inOrder:
573				inOrder = 0
574				subRanges.append((orderedBegin, lastCode))
575				orderedBegin = None
576
577		lastID = glyphID
578		lastCode = code
579
580	if inOrder:
581		subRanges.append((orderedBegin, lastCode))
582	assert lastCode == endCode
583
584	# Now filter out those new subranges that would only make the data bigger.
585	# A new segment cost 8 bytes, not using a new segment costs 2 bytes per
586	# character.
587	newRanges = []
588	for b, e in subRanges:
589		if b == startCode and e == endCode:
590			break  # the whole range, we're fine
591		if b == startCode or e == endCode:
592			threshold = 4  # split costs one more segment
593		else:
594			threshold = 8  # split costs two more segments
595		if (e - b + 1) > threshold:
596			newRanges.append((b, e))
597	subRanges = newRanges
598
599	if not subRanges:
600		return [], [endCode]
601
602	if subRanges[0][0] != startCode:
603		subRanges.insert(0, (startCode, subRanges[0][0] - 1))
604	if subRanges[-1][1] != endCode:
605		subRanges.append((subRanges[-1][1] + 1, endCode))
606
607	# Fill the "holes" in the segments list -- those are the segments in which
608	# the glyph IDs are _not_ consecutive.
609	i = 1
610	while i < len(subRanges):
611		if subRanges[i-1][1] + 1 != subRanges[i][0]:
612			subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1))
613			i = i + 1
614		i = i + 1
615
616	# Transform the ranges into startCode/endCode lists.
617	start = []
618	end = []
619	for b, e in subRanges:
620		start.append(b)
621		end.append(e)
622	start.pop(0)
623
624	assert len(start) + 1 == len(end)
625	return start, end
626
627
628class cmap_format_4(CmapSubtable):
629
630	def decompile(self, data, ttFont):
631		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
632		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
633		if data != None and ttFont != None:
634			self.decompileHeader(self.data[offset:offset+int(length)], ttFont)
635		else:
636			assert (data == None and ttFont == None), "Need both data and ttFont arguments"
637
638		data = self.data # decompileHeader assigns the data after the header to self.data
639		(segCountX2, searchRange, entrySelector, rangeShift) = \
640					struct.unpack(">4H", data[:8])
641		data = data[8:]
642		segCount = segCountX2 / 2
643
644		allCodes = array.array("H")
645		allCodes.fromstring(data)
646		self.data = data = None
647
648		if sys.byteorder != "big":
649			allCodes.byteswap()
650
651		# divide the data
652		endCode = allCodes[:segCount]
653		allCodes = allCodes[segCount+1:]  # the +1 is skipping the reservedPad field
654		startCode = allCodes[:segCount]
655		allCodes = allCodes[segCount:]
656		idDelta = allCodes[:segCount]
657		allCodes = allCodes[segCount:]
658		idRangeOffset = allCodes[:segCount]
659		glyphIndexArray = allCodes[segCount:]
660		lenGIArray = len(glyphIndexArray)
661
662		# build 2-byte character mapping
663		charCodes = []
664		gids = []
665		for i in range(len(startCode) - 1):	# don't do 0xffff!
666			rangeCharCodes = range(startCode[i], endCode[i] + 1)
667			charCodes = charCodes + rangeCharCodes
668			for charCode in rangeCharCodes:
669				rangeOffset = idRangeOffset[i]
670				if rangeOffset == 0:
671					glyphID = charCode + idDelta[i]
672				else:
673					# *someone* needs to get killed.
674					index = idRangeOffset[i] / 2 + (charCode - startCode[i]) + i - len(idRangeOffset)
675					assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array  is not less than the length of the array (%d) !" % (i, index, lenGIArray)
676					if glyphIndexArray[index] != 0:  # if not missing glyph
677						glyphID = glyphIndexArray[index] + idDelta[i]
678					else:
679						glyphID = 0  # missing glyph
680				gids.append(glyphID % 0x10000)
681
682		self.cmap = cmap = {}
683		lenCmap = len(gids)
684		glyphOrder = self.ttFont.getGlyphOrder()
685		try:
686			names = map(operator.getitem, [glyphOrder]*lenCmap, gids )
687		except IndexError:
688			getGlyphName = self.ttFont.getGlyphName
689			names = map(getGlyphName, gids )
690		map(operator.setitem, [cmap]*lenCmap, charCodes, names)
691
692
693
694	def setIDDelta(self, idDelta):
695		# The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
696		# idDelta is a short, and must be between -32K and 32K
697		# startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1
698		# This means that we have a problem because we can need to assign to idDelta values
699		# between -(64K-2) and 64K -1.
700		# Since the final gi is reconstructed from the glyphArray GID by:
701		#    (short)finalGID = (gid +  idDelta) % 0x10000),
702		# we can get from a startCode of 0 to a final GID of 64 -1K by subtracting 1, and casting the
703		# negative number to an unsigned short.
704		# Similarly , we can get from a startCode of 64K-1 to a final GID of 1 by adding 2, because of
705		# the modulo arithmetic.
706
707		if idDelta > 0x7FFF:
708			idDelta = idDelta - 0x10000
709		elif idDelta <  -0x7FFF:
710			idDelta = idDelta + 0x10000
711
712		return idDelta
713
714
715	def compile(self, ttFont):
716		if self.data:
717			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
718
719		from fontTools.ttLib.sfnt import maxPowerOfTwo
720
721		charCodes = self.cmap.keys()
722		lenCharCodes = len(charCodes)
723		if lenCharCodes == 0:
724			startCode = [0xffff]
725			endCode = [0xffff]
726		else:
727			charCodes.sort()
728			names = map(operator.getitem, [self.cmap]*lenCharCodes, charCodes)
729			nameMap = ttFont.getReverseGlyphMap()
730			try:
731				gids = map(operator.getitem, [nameMap]*lenCharCodes, names)
732			except KeyError:
733				nameMap = ttFont.getReverseGlyphMap(rebuild=1)
734				try:
735					gids = map(operator.getitem, [nameMap]*lenCharCodes, names)
736				except KeyError:
737					# allow virtual GIDs in format 4 tables
738					gids = []
739					for name in names:
740						try:
741							gid = nameMap[name]
742						except KeyError:
743							try:
744								if (name[:3] == 'gid'):
745									gid = eval(name[3:])
746								else:
747									gid = ttFont.getGlyphID(name)
748							except:
749								raise KeyError(name)
750
751						gids.append(gid)
752			cmap = {}  # code:glyphID mapping
753			map(operator.setitem, [cmap]*len(charCodes), charCodes, gids)
754
755			# Build startCode and endCode lists.
756			# Split the char codes in ranges of consecutive char codes, then split
757			# each range in more ranges of consecutive/not consecutive glyph IDs.
758			# See splitRange().
759			lastCode = charCodes[0]
760			endCode = []
761			startCode = [lastCode]
762			for charCode in charCodes[1:]:  # skip the first code, it's the first start code
763				if charCode == lastCode + 1:
764					lastCode = charCode
765					continue
766				start, end = splitRange(startCode[-1], lastCode, cmap)
767				startCode.extend(start)
768				endCode.extend(end)
769				startCode.append(charCode)
770				lastCode = charCode
771			endCode.append(lastCode)
772			startCode.append(0xffff)
773			endCode.append(0xffff)
774
775		# build up rest of cruft
776		idDelta = []
777		idRangeOffset = []
778		glyphIndexArray = []
779		for i in range(len(endCode)-1):  # skip the closing codes (0xffff)
780			indices = []
781			for charCode in range(startCode[i], endCode[i] + 1):
782				indices.append(cmap[charCode])
783			if  (indices == range(indices[0], indices[0] + len(indices))):
784				idDeltaTemp = self.setIDDelta(indices[0] - startCode[i])
785				idDelta.append( idDeltaTemp)
786				idRangeOffset.append(0)
787			else:
788				# someone *definitely* needs to get killed.
789				idDelta.append(0)
790				idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i))
791				glyphIndexArray.extend(indices)
792		idDelta.append(1)  # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef
793		idRangeOffset.append(0)
794
795		# Insane.
796		segCount = len(endCode)
797		segCountX2 = segCount * 2
798		maxExponent = maxPowerOfTwo(segCount)
799		searchRange = 2 * (2 ** maxExponent)
800		entrySelector = maxExponent
801		rangeShift = 2 * segCount - searchRange
802
803		charCodeArray = array.array("H", endCode + [0] + startCode)
804		idDeltaeArray = array.array("h", idDelta)
805		restArray = array.array("H", idRangeOffset + glyphIndexArray)
806		if sys.byteorder != "big":
807			charCodeArray.byteswap()
808			idDeltaeArray.byteswap()
809			restArray.byteswap()
810		data = charCodeArray.tostring() + idDeltaeArray.tostring() + restArray.tostring()
811
812		length = struct.calcsize(cmap_format_4_format) + len(data)
813		header = struct.pack(cmap_format_4_format, self.format, length, self.language,
814				segCountX2, searchRange, entrySelector, rangeShift)
815		return header + data
816
817	def fromXML(self, name, attrs, content, ttFont):
818		self.language = safeEval(attrs["language"])
819		if not hasattr(self, "cmap"):
820			self.cmap = {}
821		cmap = self.cmap
822
823		for element in content:
824			if type(element) != TupleType:
825				continue
826			nameMap, attrsMap, dummyContent = element
827			if nameMap != "map":
828				assert 0, "Unrecognized keyword in cmap subtable"
829			cmap[safeEval(attrsMap["code"])] = attrsMap["name"]
830
831
832class cmap_format_6(CmapSubtable):
833
834	def decompile(self, data, ttFont):
835		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
836		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
837		if data != None and ttFont != None:
838			self.decompileHeader(data[offset:offset+int(length)], ttFont)
839		else:
840			assert (data == None and ttFont == None), "Need both data and ttFont arguments"
841
842		data = self.data # decompileHeader assigns the data after the header to self.data
843		firstCode, entryCount = struct.unpack(">HH", data[:4])
844		firstCode = int(firstCode)
845		data = data[4:]
846		#assert len(data) == 2 * entryCount  # XXX not true in Apple's Helvetica!!!
847		glyphIndexArray = array.array("H")
848		glyphIndexArray.fromstring(data[:2 * int(entryCount)])
849		if sys.byteorder != "big":
850			glyphIndexArray.byteswap()
851		self.data = data = None
852
853		self.cmap = cmap = {}
854
855		lenArray = len(glyphIndexArray)
856		charCodes = range(firstCode, firstCode + lenArray )
857		glyphOrder = self.ttFont.getGlyphOrder()
858		try:
859			names = map(operator.getitem, [glyphOrder]*lenArray, glyphIndexArray )
860		except IndexError:
861			getGlyphName = self.ttFont.getGlyphName
862			names = map(getGlyphName, glyphIndexArray )
863		map(operator.setitem, [cmap]*lenArray, charCodes, names)
864
865	def compile(self, ttFont):
866		if self.data:
867			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
868		cmap = self.cmap
869		codes = cmap.keys()
870		if codes: # yes, there are empty cmap tables.
871			codes = range(codes[0], codes[-1] + 1)
872			firstCode = codes[0]
873			valueList = [cmap.get(code, ".notdef") for code in codes]
874			valueList = map(ttFont.getGlyphID, valueList)
875			glyphIndexArray = array.array("H", valueList)
876			if sys.byteorder != "big":
877				glyphIndexArray.byteswap()
878			data = glyphIndexArray.tostring()
879		else:
880			data = ""
881			firstCode = 0
882		header = struct.pack(">HHHHH",
883				6, len(data) + 10, self.language, firstCode, len(codes))
884		return header + data
885
886	def fromXML(self, name, attrs, content, ttFont):
887		self.language = safeEval(attrs["language"])
888		if not hasattr(self, "cmap"):
889			self.cmap = {}
890		cmap = self.cmap
891
892		for element in content:
893			if type(element) != TupleType:
894				continue
895			name, attrs, content = element
896			if name != "map":
897				continue
898			cmap[safeEval(attrs["code"])] = attrs["name"]
899
900
901class cmap_format_12_or_13(CmapSubtable):
902
903	def __init__(self, format):
904		self.format = format
905		self.reserved = 0
906		self.data = None
907		self.ttFont = None
908
909	def decompileHeader(self, data, ttFont):
910		format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16])
911		assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length)
912		self.format = format
913		self.reserved = reserved
914		self.length = length
915		self.language = language
916		self.nGroups = nGroups
917		self.data = data[16:]
918		self.ttFont = ttFont
919
920	def decompile(self, data, ttFont):
921		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
922		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
923		if data != None and ttFont != None:
924			self.decompileHeader(data[offset:offset+int(length)], ttFont)
925		else:
926			assert (data == None and ttFont == None), "Need both data and ttFont arguments"
927
928		data = self.data # decompileHeader assigns the data after the header to self.data
929		charCodes = []
930		gids = []
931		pos = 0
932		for i in range(self.nGroups):
933			startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] )
934			pos += 12
935			lenGroup = 1 + endCharCode - startCharCode
936			charCodes += range(startCharCode, endCharCode +1)
937			gids += self._computeGIDs(glyphID, lenGroup)
938		self.data = data = None
939		self.cmap = cmap = {}
940		lenCmap = len(gids)
941		glyphOrder = self.ttFont.getGlyphOrder()
942		try:
943			names = map(operator.getitem, [glyphOrder]*lenCmap, gids )
944		except IndexError:
945			getGlyphName = self.ttFont.getGlyphName
946			names = map(getGlyphName, gids )
947		map(operator.setitem, [cmap]*lenCmap, charCodes, names)
948
949	def compile(self, ttFont):
950		if self.data:
951			return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data
952		charCodes = self.cmap.keys()
953		lenCharCodes = len(charCodes)
954		names = self.cmap.values()
955		nameMap = ttFont.getReverseGlyphMap()
956		try:
957			gids = map(operator.getitem, [nameMap]*lenCharCodes, names)
958		except KeyError:
959			nameMap = ttFont.getReverseGlyphMap(rebuild=1)
960			try:
961				gids = map(operator.getitem, [nameMap]*lenCharCodes, names)
962			except KeyError:
963				# allow virtual GIDs in format 12 tables
964				gids = []
965				for name in names:
966					try:
967						gid = nameMap[name]
968					except KeyError:
969						try:
970							if (name[:3] == 'gid'):
971								gid = eval(name[3:])
972							else:
973								gid = ttFont.getGlyphID(name)
974						except:
975							raise KeyError(name)
976
977					gids.append(gid)
978
979		cmap = {}  # code:glyphID mapping
980		map(operator.setitem, [cmap]*len(charCodes), charCodes, gids)
981
982		charCodes.sort()
983		index = 0
984		startCharCode = charCodes[0]
985		startGlyphID = cmap[startCharCode]
986		lastGlyphID = startGlyphID - self._format_step
987		lastCharCode = startCharCode - 1
988		nGroups = 0
989		dataList =  []
990		maxIndex = len(charCodes)
991		for index in range(maxIndex):
992			charCode = charCodes[index]
993			glyphID = cmap[charCode]
994			if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode):
995				dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
996				startCharCode = charCode
997				startGlyphID = glyphID
998				nGroups = nGroups + 1
999			lastGlyphID = glyphID
1000			lastCharCode = charCode
1001		dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
1002		nGroups = nGroups + 1
1003		data = "".join(dataList)
1004		lengthSubtable = len(data) +16
1005		assert len(data) == (nGroups*12) == (lengthSubtable-16)
1006		return struct.pack(">HHLLL", self.format, self.reserved , lengthSubtable, self.language, nGroups) + data
1007
1008	def toXML(self, writer, ttFont):
1009		writer.begintag(self.__class__.__name__, [
1010				("platformID", self.platformID),
1011				("platEncID", self.platEncID),
1012				("format", self.format),
1013				("reserved", self.reserved),
1014				("length", self.length),
1015				("language", self.language),
1016				("nGroups", self.nGroups),
1017				])
1018		writer.newline()
1019		codes = self.cmap.items()
1020		codes.sort()
1021		self._writeCodes(codes, writer)
1022		writer.endtag(self.__class__.__name__)
1023		writer.newline()
1024
1025	def fromXML(self, name, attrs, content, ttFont):
1026		self.format = safeEval(attrs["format"])
1027		self.reserved = safeEval(attrs["reserved"])
1028		self.length = safeEval(attrs["length"])
1029		self.language = safeEval(attrs["language"])
1030		self.nGroups = safeEval(attrs["nGroups"])
1031		if not hasattr(self, "cmap"):
1032			self.cmap = {}
1033		cmap = self.cmap
1034
1035		for element in content:
1036			if type(element) != TupleType:
1037				continue
1038			name, attrs, content = element
1039			if name != "map":
1040				continue
1041			cmap[safeEval(attrs["code"])] = attrs["name"]
1042
1043
1044class cmap_format_12(cmap_format_12_or_13):
1045	def __init__(self, format):
1046		cmap_format_12_or_13.__init__(self, format)
1047		self._format_step = 1
1048
1049	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
1050		return range(startingGlyph, startingGlyph + numberOfGlyphs)
1051
1052	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
1053		return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode)
1054
1055
1056class cmap_format_13(cmap_format_12_or_13):
1057	def __init__(self, format):
1058		cmap_format_12_or_13.__init__(self, format)
1059		self._format_step = 0
1060
1061	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
1062		return [startingGlyph] * numberOfGlyphs
1063
1064	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
1065		return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode)
1066
1067
1068def  cvtToUVS(threeByteString):
1069	if sys.byteorder != "big":
1070		data = "\0" +threeByteString
1071	else:
1072		data = threeByteString + "\0"
1073	val, = struct.unpack(">L", data)
1074	return val
1075
1076def  cvtFromUVS(val):
1077	if sys.byteorder != "big":
1078		threeByteString = struct.pack(">L", val)[1:]
1079	else:
1080		threeByteString = struct.pack(">L", val)[:3]
1081	return threeByteString
1082
1083def cmpUVSListEntry(first, second):
1084	uv1, glyphName1 = first
1085	uv2, glyphName2 = second
1086
1087	if (glyphName1 == None) and (glyphName2 != None):
1088		return -1
1089	elif (glyphName2 == None) and (glyphName1 != None):
1090		return 1
1091
1092	ret = cmp(uv1, uv2)
1093	if ret:
1094		return ret
1095	return cmp(glyphName1, glyphName2)
1096
1097
1098class cmap_format_14(CmapSubtable):
1099
1100	def decompileHeader(self, data, ttFont):
1101		format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10])
1102		self.data = data[10:]
1103		self.length = length
1104		self.numVarSelectorRecords = numVarSelectorRecords
1105		self.ttFont = ttFont
1106		self.language = 0xFF # has no language.
1107
1108	def decompile(self, data, ttFont):
1109		if data != None and ttFont != None and ttFont.lazy:
1110			self.decompileHeader(data, ttFont)
1111		else:
1112			assert (data == None and ttFont == None), "Need both data and ttFont arguments"
1113		data = self.data
1114
1115		self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1116		uvsDict = {}
1117		recOffset = 0
1118		for n in range(self.numVarSelectorRecords):
1119			uvs, defOVSOffset, nonDefUVSOffset =  struct.unpack(">3sLL", data[recOffset:recOffset +11])
1120			recOffset += 11
1121			varUVS = cvtToUVS(uvs)
1122			if defOVSOffset:
1123				startOffset = defOVSOffset  - 10
1124				numValues, = struct.unpack(">L", data[startOffset:startOffset+4])
1125				startOffset +=4
1126				for r in range(numValues):
1127					uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4])
1128					startOffset += 4
1129					firstBaseUV = cvtToUVS(uv)
1130					cnt = addtlCnt+1
1131					baseUVList = range(firstBaseUV, firstBaseUV+cnt)
1132					glyphList = [None]*cnt
1133					localUVList = zip(baseUVList, glyphList)
1134					try:
1135						uvsDict[varUVS].extend(localUVList)
1136					except KeyError:
1137						uvsDict[varUVS] = localUVList
1138
1139			if nonDefUVSOffset:
1140				startOffset = nonDefUVSOffset  - 10
1141				numRecs, = struct.unpack(">L", data[startOffset:startOffset+4])
1142				startOffset +=4
1143				localUVList = []
1144				for r in range(numRecs):
1145					uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5])
1146					startOffset += 5
1147					uv = cvtToUVS(uv)
1148					glyphName = self.ttFont.getGlyphName(gid)
1149					localUVList.append( [uv, glyphName] )
1150				try:
1151					uvsDict[varUVS].extend(localUVList)
1152				except KeyError:
1153					uvsDict[varUVS] = localUVList
1154
1155		self.uvsDict = uvsDict
1156
1157	def toXML(self, writer, ttFont):
1158		writer.begintag(self.__class__.__name__, [
1159				("platformID", self.platformID),
1160				("platEncID", self.platEncID),
1161				("format", self.format),
1162				("length", self.length),
1163				("numVarSelectorRecords", self.numVarSelectorRecords),
1164				])
1165		writer.newline()
1166		uvsDict = self.uvsDict
1167		uvsList = uvsDict.keys()
1168		uvsList.sort()
1169		for uvs in uvsList:
1170			uvList = uvsDict[uvs]
1171			uvList.sort(cmpUVSListEntry)
1172			for uv, gname in uvList:
1173				if gname == None:
1174					gname = "None"
1175				# I use the arg rather than th keyword syntax in order to preserve the attribute order.
1176				writer.simpletag("map", [ ("uvs",hex(uvs)), ("uv",hex(uv)), ("name", gname)]  )
1177				writer.newline()
1178		writer.endtag(self.__class__.__name__)
1179		writer.newline()
1180
1181	def fromXML(self, name, attrs, content, ttFont):
1182		self.format = safeEval(attrs["format"])
1183		self.length = safeEval(attrs["length"])
1184		self.numVarSelectorRecords = safeEval(attrs["numVarSelectorRecords"])
1185		self.language = 0xFF # provide a value so that  CmapSubtable.__cmp__() won't fail
1186		if not hasattr(self, "cmap"):
1187			self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1188		if not hasattr(self, "uvsDict"):
1189			self.uvsDict  = {}
1190			uvsDict = self.uvsDict
1191
1192		for element in content:
1193			if type(element) != TupleType:
1194				continue
1195			name, attrs, content = element
1196			if name != "map":
1197				continue
1198			uvs = safeEval(attrs["uvs"])
1199			uv = safeEval(attrs["uv"])
1200			gname = attrs["name"]
1201			if gname == "None":
1202				gname = None
1203			try:
1204				uvsDict[uvs].append( [uv, gname])
1205			except KeyError:
1206				uvsDict[uvs] = [ [uv, gname] ]
1207
1208
1209	def compile(self, ttFont):
1210		if self.data:
1211			return struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) + self.data
1212
1213		uvsDict = self.uvsDict
1214		uvsList = uvsDict.keys()
1215		uvsList.sort()
1216		self.numVarSelectorRecords = len(uvsList)
1217		offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block.
1218		data = []
1219		varSelectorRecords =[]
1220		for uvs in uvsList:
1221			entryList = uvsDict[uvs]
1222
1223			defList = filter(lambda entry: entry[1] == None, entryList)
1224			if defList:
1225				defList = map(lambda entry: entry[0], defList)
1226				defOVSOffset = offset
1227				defList.sort()
1228
1229				lastUV = defList[0]
1230				cnt = -1
1231				defRecs = []
1232				for defEntry in defList:
1233					cnt +=1
1234					if (lastUV+cnt) != defEntry:
1235						rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1)
1236						lastUV = defEntry
1237						defRecs.append(rec)
1238						cnt = 0
1239
1240				rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt)
1241				defRecs.append(rec)
1242
1243				numDefRecs = len(defRecs)
1244				data.append(struct.pack(">L", numDefRecs))
1245				data.extend(defRecs)
1246				offset += 4 + numDefRecs*4
1247			else:
1248				defOVSOffset = 0
1249
1250			ndefList = filter(lambda entry: entry[1] != None, entryList)
1251			if ndefList:
1252				nonDefUVSOffset = offset
1253				ndefList.sort()
1254				numNonDefRecs = len(ndefList)
1255				data.append(struct.pack(">L", numNonDefRecs))
1256				offset += 4 + numNonDefRecs*5
1257
1258				for uv, gname in ndefList:
1259					gid = ttFont.getGlyphID(gname)
1260					ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid)
1261					data.append(ndrec)
1262			else:
1263				nonDefUVSOffset = 0
1264
1265			vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset)
1266			varSelectorRecords.append(vrec)
1267
1268		data = "".join(varSelectorRecords) + "".join(data)
1269		self.length = 10 + len(data)
1270		headerdata = struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords)
1271		self.data = headerdata + data
1272
1273		return self.data
1274
1275
1276class cmap_format_unknown(CmapSubtable):
1277
1278	def toXML(self, writer, ttFont):
1279		cmapName = self.__class__.__name__[:12] + str(self.format)
1280		writer.begintag(cmapName, [
1281				("platformID", self.platformID),
1282				("platEncID", self.platEncID),
1283				])
1284		writer.newline()
1285		writer.dumphex(self.data)
1286		writer.endtag(cmapName)
1287		writer.newline()
1288
1289	def fromXML(self, name, attrs, content, ttFont):
1290		self.data = readHex(content)
1291		self.cmap = {}
1292
1293	def decompileHeader(self, data, ttFont):
1294		self.language = 0  # dummy value
1295		self.data = data
1296
1297	def decompile(self, data, ttFont):
1298		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
1299		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
1300		if data != None and ttFont != None:
1301			self.decompileHeader(data[offset:offset+int(length)], ttFont)
1302		else:
1303			assert (data == None and ttFont == None), "Need both data and ttFont arguments"
1304
1305	def compile(self, ttFont):
1306		if self.data:
1307			return self.data
1308		else:
1309			return None
1310
1311cmap_classes = {
1312		0: cmap_format_0,
1313		2: cmap_format_2,
1314		4: cmap_format_4,
1315		6: cmap_format_6,
1316		12: cmap_format_12,
1317		13: cmap_format_13,
1318		14: cmap_format_14,
1319		}
1320