cffLib.py revision 15aae2e2ec6984ddc70c460625f5fe0bccb48a01
1"""cffLib.py -- read/write tools for Adobe CFF fonts."""
2
3__version__ = "$Id: cffLib.py,v 1.2 1999-12-20 21:59:49 Just Exp $"
4__author__ = "jvr"
5
6import struct, sstruct
7import string
8import types
9import psCharStrings
10
11
12cffHeaderFormat = """
13	major:   B
14	minor:   B
15	hdrSize: B
16	offSize: B
17"""
18
19class CFFFontSet:
20
21	def __init__(self):
22		self.fonts = {}
23
24	def decompile(self, data):
25		sstruct.unpack(cffHeaderFormat, data[:4], self)
26		assert self.major == 1 and self.minor == 0, \
27				"unknown CFF format: %d.%d" % (self.major, self.minor)
28		restdata = data[self.hdrSize:]
29
30		self.fontNames, restdata = readINDEX(restdata)
31		topDicts, restdata = readINDEX(restdata)
32		strings, restdata = readINDEX(restdata)
33		strings = IndexedStrings(strings)
34		globalSubrs, restdata = readINDEX(restdata)
35		self.GlobalSubrs = map(psCharStrings.T2CharString, globalSubrs)
36
37		for i in range(len(topDicts)):
38			font = self.fonts[self.fontNames[i]] = CFFFont()
39			font.GlobalSubrs = self.GlobalSubrs  # Hmm.
40			font.decompile(data, topDicts[i], strings, self)  # maybe only 'on demand'?
41
42
43	def compile(self):
44		strings = IndexedStrings()
45		XXXX
46
47	def toXML(self, xmlWriter, progress=None):
48		xmlWriter.newline()
49		for fontName in self.fontNames:
50			xmlWriter.begintag("CFFFont", name=fontName)
51			xmlWriter.newline()
52			font = self.fonts[fontName]
53			font.toXML(xmlWriter, progress)
54			xmlWriter.endtag("CFFFont")
55			xmlWriter.newline()
56		xmlWriter.newline()
57		xmlWriter.begintag("GlobalSubrs")
58		xmlWriter.newline()
59		for i in range(len(self.GlobalSubrs)):
60			xmlWriter.newline()
61			xmlWriter.begintag("CharString", id=i)
62			xmlWriter.newline()
63			self.GlobalSubrs[i].toXML(xmlWriter)
64			xmlWriter.endtag("CharString")
65			xmlWriter.newline()
66		xmlWriter.newline()
67		xmlWriter.endtag("GlobalSubrs")
68		xmlWriter.newline()
69		xmlWriter.newline()
70
71	def fromXML(self, (name, attrs, content)):
72		xxx
73
74
75class IndexedStrings:
76
77	def __init__(self, strings=None):
78		if strings is None:
79			strings = []
80		self.strings = strings
81
82	def __getitem__(self, SID):
83		if SID < cffStandardStringCount:
84			return cffStandardStrings[SID]
85		else:
86			return self.strings[SID - cffStandardStringCount]
87
88	def getSID(self, s):
89		if not hasattr(self, "stringMapping"):
90			self.buildStringMapping()
91		if cffStandardStringMapping.has_key(s):
92			SID = cffStandardStringMapping[s]
93		if self.stringMapping.has_key(s):
94			SID = self.stringMapping[s]
95		else:
96			SID = len(self.strings) + cffStandardStringCount
97			self.strings.append(s)
98			self.stringMapping[s] = SID
99		return SID
100
101	def getStrings(self):
102		return self.strings
103
104	def buildStringMapping(self):
105		self.stringMapping = {}
106		for index in range(len(self.strings)):
107			self.stringMapping[self.strings[index]] = index + cffStandardStringCount
108
109
110class CFFFont:
111
112	defaults = psCharStrings.topDictDefaults
113
114	def __init__(self):
115		pass
116
117	def __getattr__(self, attr):
118		if not self.defaults.has_key(attr):
119			raise AttributeError, attr
120		return self.defaults[attr]
121
122	def fromDict(self, dict):
123		self.__dict__.update(dict)
124
125	def decompile(self, data, topDictData, strings, fontSet):
126		top = psCharStrings.TopDictDecompiler(strings)
127		top.decompile(topDictData)
128		self.fromDict(top.getDict())
129
130		# get private dict
131		size, offset = self.Private
132		#print "YYY Private (size, offset):", size, offset
133		privateData = data[offset:offset+size]
134		self.Private = PrivateDict()
135		self.Private.decompile(data[offset:], privateData, strings)
136
137		# get raw charstrings
138		#print "YYYY CharStrings offset:", self.CharStrings
139		rawCharStrings, restdata = readINDEX(data[self.CharStrings:])
140		nGlyphs = len(rawCharStrings)
141
142		# get charset (or rather: get glyphNames)
143		charsetOffset = self.charset
144		if charsetOffset == 0:
145			xxx  # standard charset
146		else:
147			#print "YYYYY charsetOffset:", charsetOffset
148			format = ord(data[charsetOffset])
149			if format == 0:
150				xxx
151			elif format == 1:
152				charSet = parseCharsetFormat1(nGlyphs,
153						data[charsetOffset+1:], strings)
154			elif format == 2:
155				charSet = parseCharsetFormat2(nGlyphs,
156						data[charsetOffset+1:], strings)
157			elif format == 3:
158				xxx
159			else:
160				xxx
161		self.charset = charSet
162
163		assert len(charSet) == nGlyphs
164		self.CharStrings = charStrings = {}
165		if self.CharstringType == 2:
166			# Type 2 CharStrings
167			charStringClass = psCharStrings.T2CharString
168		else:
169			# Type 1 CharStrings
170			charStringClass = psCharStrings.T1CharString
171		for i in range(nGlyphs):
172			charStrings[charSet[i]] = charStringClass(rawCharStrings[i])
173		assert len(charStrings) == nGlyphs
174
175		# XXX Encoding!
176		encoding = self.Encoding
177		if encoding not in (0, 1):
178			# encoding is an _offset_ from the beginning of 'data' to an encoding subtable
179			XXX
180			self.Encoding = encoding
181
182	def getGlyphOrder(self):
183		return self.charset
184
185	def setGlyphOrder(self, glyphOrder):
186		self.charset = glyphOrder
187
188	def decompileAllCharStrings(self):
189		if self.CharstringType == 2:
190			# Type 2 CharStrings
191			decompiler = psCharStrings.SimpleT2Decompiler(self.Private.Subrs, self.GlobalSubrs)
192			for charString in self.CharStrings.values():
193				if charString.needsDecompilation():
194					decompiler.reset()
195					decompiler.execute(charString)
196		else:
197			# Type 1 CharStrings
198			for charString in self.CharStrings.values():
199				charString.decompile()
200
201	def toXML(self, xmlWriter, progress=None):
202		xmlWriter.newline()
203		# first dump the simple values
204		self.toXMLSimpleValues(xmlWriter)
205
206		# dump charset
207		# XXX
208
209		# decompile all charstrings
210		if progress:
211			progress.setlabel("Decompiling CharStrings...")
212		self.decompileAllCharStrings()
213
214		# dump private dict
215		xmlWriter.begintag("Private")
216		xmlWriter.newline()
217		self.Private.toXML(xmlWriter)
218		xmlWriter.endtag("Private")
219		xmlWriter.newline()
220
221		self.toXMLCharStrings(xmlWriter, progress)
222
223	def toXMLSimpleValues(self, xmlWriter):
224		keys = self.__dict__.keys()
225		keys.remove("CharStrings")
226		keys.remove("Private")
227		keys.remove("charset")
228		keys.remove("GlobalSubrs")
229		keys.sort()
230		for key in keys:
231			value = getattr(self, key)
232			if key == "Encoding":
233				if value == 0:
234					# encoding is (Adobe) Standard Encoding
235					value = "StandardEncoding"
236				elif value == 1:
237					# encoding is Expert Encoding
238					value = "ExpertEncoding"
239			if type(value) == types.ListType:
240				value = string.join(map(str, value), " ")
241			else:
242				value = str(value)
243			xmlWriter.begintag(key)
244			if hasattr(value, "toXML"):
245				xmlWriter.newline()
246				value.toXML(xmlWriter)
247				xmlWriter.newline()
248			else:
249				xmlWriter.write(value)
250			xmlWriter.endtag(key)
251			xmlWriter.newline()
252		xmlWriter.newline()
253
254	def toXMLCharStrings(self, xmlWriter, progress=None):
255		charStrings = self.CharStrings
256		xmlWriter.newline()
257		xmlWriter.begintag("CharStrings")
258		xmlWriter.newline()
259		glyphNames = charStrings.keys()
260		glyphNames.sort()
261		for glyphName in glyphNames:
262			if progress:
263				progress.setlabel("Dumping 'CFF ' table... (%s)" % glyphName)
264				progress.increment()
265			xmlWriter.newline()
266			charString = charStrings[glyphName]
267			xmlWriter.begintag("CharString", name=glyphName)
268			xmlWriter.newline()
269			charString.toXML(xmlWriter)
270			xmlWriter.endtag("CharString")
271			xmlWriter.newline()
272		xmlWriter.newline()
273		xmlWriter.endtag("CharStrings")
274		xmlWriter.newline()
275
276
277class PrivateDict:
278
279	defaults = psCharStrings.privateDictDefaults
280
281	def __init__(self):
282		pass
283
284	def decompile(self, data, privateData, strings):
285		p = psCharStrings.PrivateDictDecompiler(strings)
286		p.decompile(privateData)
287		self.fromDict(p.getDict())
288
289		# get local subrs
290		#print "YYY Private.Subrs:", self.Subrs
291		chunk = data[self.Subrs:]
292		localSubrs, restdata = readINDEX(chunk)
293		self.Subrs = map(psCharStrings.T2CharString, localSubrs)
294
295	def toXML(self, xmlWriter):
296		xmlWriter.newline()
297		keys = self.__dict__.keys()
298		keys.remove("Subrs")
299		for key in keys:
300			value = getattr(self, key)
301			if type(value) == types.ListType:
302				value = string.join(map(str, value), " ")
303			else:
304				value = str(value)
305			xmlWriter.begintag(key)
306			xmlWriter.write(value)
307			xmlWriter.endtag(key)
308			xmlWriter.newline()
309		# write subroutines
310		xmlWriter.newline()
311		xmlWriter.begintag("Subrs")
312		xmlWriter.newline()
313		for i in range(len(self.Subrs)):
314			xmlWriter.newline()
315			xmlWriter.begintag("CharString", id=i)
316			xmlWriter.newline()
317			self.Subrs[i].toXML(xmlWriter)
318			xmlWriter.endtag("CharString")
319			xmlWriter.newline()
320		xmlWriter.newline()
321		xmlWriter.endtag("Subrs")
322		xmlWriter.newline()
323		xmlWriter.newline()
324
325	def __getattr__(self, attr):
326		if not self.defaults.has_key(attr):
327			raise AttributeError, attr
328		return self.defaults[attr]
329
330	def fromDict(self, dict):
331		self.__dict__.update(dict)
332
333
334def readINDEX(data):
335	count, = struct.unpack(">H", data[:2])
336	count = int(count)
337	offSize = ord(data[2])
338	data = data[3:]
339	offsets = []
340	for index in range(count+1):
341		chunk = data[index * offSize: (index+1) * offSize]
342		chunk = '\0' * (4 - offSize) + chunk
343		offset, = struct.unpack(">L", chunk)
344		offset = int(offset)
345		offsets.append(offset)
346	data = data[(count+1) * offSize:]
347	prev = offsets[0]
348	stuff = []
349	for next in offsets[1:]:
350		chunk = data[prev-1:next-1]
351		assert len(chunk) == next - prev
352		stuff.append(chunk)
353		prev = next
354	data = data[next-1:]
355	return stuff, data
356
357
358def parseCharsetFormat1(nGlyphs, data, strings):
359	charSet = ['.notdef']
360	count = 1
361	while count < nGlyphs:
362		first = int(struct.unpack(">H", data[:2])[0])
363		nLeft = ord(data[2])
364		data = data[3:]
365		for SID in range(first, first+nLeft+1):
366			charSet.append(strings[SID])
367		count = count + nLeft + 1
368	return charSet
369
370
371def parseCharsetFormat2(nGlyphs, data, strings):
372	charSet = ['.notdef']
373	count = 1
374	while count < nGlyphs:
375		first = int(struct.unpack(">H", data[:2])[0])
376		nLeft = int(struct.unpack(">H", data[2:4])[0])
377		data = data[4:]
378		for SID in range(first, first+nLeft+1):
379			charSet.append(strings[SID])
380		count = count + nLeft + 1
381	return charSet
382
383
384# The 391 Standard Strings as used in the CFF format.
385# from Adobe Technical None #5176, version 1.0, 18 March 1998
386
387cffStandardStrings = ['.notdef', 'space', 'exclam', 'quotedbl', 'numbersign',
388		'dollar', 'percent', 'ampersand', 'quoteright', 'parenleft', 'parenright',
389		'asterisk', 'plus', 'comma', 'hyphen', 'period', 'slash', 'zero', 'one',
390		'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'colon',
391		'semicolon', 'less', 'equal', 'greater', 'question', 'at', 'A', 'B', 'C',
392		'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
393		'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash',
394		'bracketright', 'asciicircum', 'underscore', 'quoteleft', 'a', 'b', 'c',
395		'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
396		's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft', 'bar', 'braceright',
397		'asciitilde', 'exclamdown', 'cent', 'sterling', 'fraction', 'yen', 'florin',
398		'section', 'currency', 'quotesingle', 'quotedblleft', 'guillemotleft',
399		'guilsinglleft', 'guilsinglright', 'fi', 'fl', 'endash', 'dagger',
400		'daggerdbl', 'periodcentered', 'paragraph', 'bullet', 'quotesinglbase',
401		'quotedblbase', 'quotedblright', 'guillemotright', 'ellipsis', 'perthousand',
402		'questiondown', 'grave', 'acute', 'circumflex', 'tilde', 'macron', 'breve',
403		'dotaccent', 'dieresis', 'ring', 'cedilla', 'hungarumlaut', 'ogonek', 'caron',
404		'emdash', 'AE', 'ordfeminine', 'Lslash', 'Oslash', 'OE', 'ordmasculine', 'ae',
405		'dotlessi', 'lslash', 'oslash', 'oe', 'germandbls', 'onesuperior',
406		'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf', 'plusminus', 'Thorn',
407		'onequarter', 'divide', 'brokenbar', 'degree', 'thorn', 'threequarters',
408		'twosuperior', 'registered', 'minus', 'eth', 'multiply', 'threesuperior',
409		'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave', 'Aring',
410		'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
411		'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
412		'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
413		'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
414		'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
415		'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
416		'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
417		'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
418		'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
419		'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
420		'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
421		'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
422		'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
423		'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
424		'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
425		'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
426		'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
427		'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
428		'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
429		'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
430		'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
431		'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
432		'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
433		'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
434		'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
435		'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths', 'seveneighths',
436		'onethird', 'twothirds', 'zerosuperior', 'foursuperior', 'fivesuperior',
437		'sixsuperior', 'sevensuperior', 'eightsuperior', 'ninesuperior', 'zeroinferior',
438		'oneinferior', 'twoinferior', 'threeinferior', 'fourinferior', 'fiveinferior',
439		'sixinferior', 'seveninferior', 'eightinferior', 'nineinferior', 'centinferior',
440		'dollarinferior', 'periodinferior', 'commainferior', 'Agravesmall',
441		'Aacutesmall', 'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall',
442		'AEsmall', 'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
443		'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
444		'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
445		'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
446		'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
447		'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
448		'001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
449		'Semibold'
450]
451
452cffStandardStringCount = 391
453assert len(cffStandardStrings) == cffStandardStringCount
454# build reverse mapping
455cffStandardStringMapping = {}
456for _i in range(cffStandardStringCount):
457	cffStandardStringMapping[cffStandardStrings[_i]] = _i
458
459
460