__init__.py revision 0011bb691018c7feed1f8c3554d88b9ae096e127
1"""fontTools.ttLib -- a package for dealing with TrueType fonts.
2
3This package offers translators to convert TrueType fonts to Python
4objects and vice versa, and additionally from Python to TTX (an XML-based
5text format) and vice versa.
6
7Example interactive session:
8
9Python 1.5.2c1 (#43, Mar  9 1999, 13:06:43)  [CW PPC w/GUSI w/MSL]
10Copyright 1991-1995 Stichting Mathematisch Centrum, Amsterdam
11>>> from fontTools import ttLib
12>>> tt = ttLib.TTFont("afont.ttf")
13>>> tt['maxp'].numGlyphs
14242
15>>> tt['OS/2'].achVendID
16'B&H\000'
17>>> tt['head'].unitsPerEm
182048
19>>> tt.saveXML("afont.ttx")
20Dumping 'LTSH' table...
21Dumping 'OS/2' table...
22Dumping 'VDMX' table...
23Dumping 'cmap' table...
24Dumping 'cvt ' table...
25Dumping 'fpgm' table...
26Dumping 'glyf' table...
27Dumping 'hdmx' table...
28Dumping 'head' table...
29Dumping 'hhea' table...
30Dumping 'hmtx' table...
31Dumping 'loca' table...
32Dumping 'maxp' table...
33Dumping 'name' table...
34Dumping 'post' table...
35Dumping 'prep' table...
36>>> tt2 = ttLib.TTFont()
37>>> tt2.importXML("afont.ttx")
38>>> tt2['maxp'].numGlyphs
39242
40>>>
41
42"""
43
44#
45# $Id: __init__.py,v 1.30 2002-05-23 09:42:45 jvr Exp $
46#
47
48import os
49import string
50import types
51
52
53class TTLibError(Exception): pass
54
55
56class TTFont:
57
58	"""The main font object. It manages file input and output, and offers
59	a convenient way of accessing tables.
60	Tables will be only decompiled when neccesary, ie. when they're actually
61	accessed. This means that simple operations can be extremely fast.
62	"""
63
64	def __init__(self, file=None, res_name_or_index=None,
65			sfntVersion="\000\001\000\000", checkChecksums=0,
66			verbose=0, recalcBBoxes=1):
67
68		"""The constructor can be called with a few different arguments.
69		When reading a font from disk, 'file' should be either a pathname
70		pointing to a file, or a readable file object.
71
72		It we're running on a Macintosh, 'res_name_or_index' maybe an sfnt
73		resource name or an sfnt resource index number or zero. The latter
74		case will cause TTLib to autodetect whether the file is a flat file
75		or a suitcase. (If it's a suitcase, only the first 'sfnt' resource
76		will be read!)
77
78		The 'checkChecksums' argument is used to specify how sfnt
79		checksums are treated upon reading a file from disk:
80			0: don't check (default)
81			1: check, print warnings if a wrong checksum is found
82			2: check, raise an exception if a wrong checksum is found.
83
84		The TTFont constructor can also be called without a 'file'
85		argument: this is the way to create a new empty font.
86		In this case you can optionally supply the 'sfntVersion' argument.
87
88		If the recalcBBoxes argument is false, a number of things will *not*
89		be recalculated upon save/compile:
90			1) glyph bounding boxes
91			2) maxp font bounding box
92			3) hhea min/max values
93		(1) is needed for certain kinds of CJK fonts (ask Werner Lemberg ;-).
94		Additionally, upon importing an TTX file, this option cause glyphs
95		to be compiled right away. This should reduce memory consumption
96		greatly, and therefore should have some impact on the time needed
97		to parse/compile large fonts.
98		"""
99
100		import sfnt
101		self.verbose = verbose
102		self.recalcBBoxes = recalcBBoxes
103		self.tables = {}
104		self.reader = None
105		if not file:
106			self.sfntVersion = sfntVersion
107			return
108		if type(file) == types.StringType:
109			if os.name == "mac" and res_name_or_index is not None:
110				# on the mac, we deal with sfnt resources as well as flat files
111				import macUtils
112				if res_name_or_index == 0:
113					if macUtils.getSFNTResIndices(file):
114						# get the first available sfnt font.
115						file = macUtils.SFNTResourceReader(file, 1)
116					else:
117						file = open(file, "rb")
118				else:
119					file = macUtils.SFNTResourceReader(file, res_name_or_index)
120			else:
121				file = open(file, "rb")
122		else:
123			pass # assume "file" is a readable file object
124		self.reader = sfnt.SFNTReader(file, checkChecksums)
125		self.sfntVersion = self.reader.sfntVersion
126
127	def close(self):
128		"""If we still have a reader object, close it."""
129		if self.reader is not None:
130			self.reader.close()
131
132	def save(self, file, makeSuitcase=0):
133		"""Save the font to disk. Similarly to the constructor,
134		the 'file' argument can be either a pathname or a writable
135		file object.
136
137		On the Mac, if makeSuitcase is true, a suitcase (resource fork)
138		file will we made instead of a flat .ttf file.
139		"""
140		from fontTools.ttLib import sfnt
141		if type(file) == types.StringType:
142			closeStream = 1
143			if os.name == "mac" and makeSuitcase:
144				import macUtils
145				file = macUtils.SFNTResourceWriter(file, self)
146			else:
147				file = open(file, "wb")
148				if os.name == "mac":
149					import macfs
150					fss = macfs.FSSpec(file.name)
151					fss.SetCreatorType('mdos', 'BINA')
152		else:
153			# assume "file" is a writable file object
154			closeStream = 0
155
156		tags = self.keys()
157		tags.remove("GlyphOrder")
158		numTables = len(tags)
159		writer = sfnt.SFNTWriter(file, numTables, self.sfntVersion)
160
161		done = []
162		for tag in tags:
163			self._writeTable(tag, writer, done)
164
165		writer.close(closeStream)
166
167	def saveXML(self, fileOrPath, progress=None,
168			tables=None, skipTables=None, splitTables=0, disassembleInstructions=1):
169		"""Export the font as TTX (an XML-based text file), or as a series of text
170		files when splitTables is true. In the latter case, the 'fileOrPath'
171		argument should be a path to a directory.
172		The 'tables' argument must either be false (dump all tables) or a
173		list of tables to dump. The 'skipTables' argument may be a list of tables
174		to skip, but only when the 'tables' argument is false.
175		"""
176		from fontTools import version
177		import xmlWriter
178
179		self.disassembleInstructions = disassembleInstructions
180		if not tables:
181			tables = self.keys()
182			if skipTables:
183				for tag in skipTables:
184					if tag in tables:
185						tables.remove(tag)
186		numTables = len(tables)
187		numGlyphs = self['maxp'].numGlyphs
188		if progress:
189			progress.set(0, numTables * numGlyphs)
190
191		writer = xmlWriter.XMLWriter(fileOrPath)
192		writer.begintag("ttFont", sfntVersion=`self.sfntVersion`[1:-1],
193				ttLibVersion=version)
194		writer.newline()
195
196		if not splitTables:
197			writer.newline()
198		else:
199			# 'fileOrPath' must now be a path
200			path, ext = os.path.splitext(fileOrPath)
201			fileNameTemplate = path + ".%s" + ext
202
203		for i in range(numTables):
204			tag = tables[i]
205			if splitTables:
206				tablePath = fileNameTemplate % tagToIdentifier(tag)
207				tableWriter = xmlWriter.XMLWriter(tablePath)
208				tableWriter.begintag("ttFont", ttLibVersion=version)
209				tableWriter.newline()
210				tableWriter.newline()
211				writer.simpletag(tagToXML(tag), src=os.path.basename(tablePath))
212				writer.newline()
213			else:
214				tableWriter = writer
215			self._tableToXML(tableWriter, tag, progress)
216			if splitTables:
217				tableWriter.endtag("ttFont")
218				tableWriter.newline()
219				tableWriter.close()
220			if progress:
221				progress.set(i * numGlyphs, numTables * numGlyphs)
222		writer.endtag("ttFont")
223		writer.newline()
224		writer.close()
225		if self.verbose:
226			debugmsg("Done dumping TTX")
227
228	def _tableToXML(self, writer, tag, progress):
229		if self.has_key(tag):
230			table = self[tag]
231			report = "Dumping '%s' table..." % tag
232		else:
233			report = "No '%s' table found." % tag
234		if progress:
235			progress.setlabel(report)
236		elif self.verbose:
237			debugmsg(report)
238		else:
239			print report
240		if not self.has_key(tag):
241			return
242		xmlTag = tagToXML(tag)
243		if hasattr(table, "ERROR"):
244			writer.begintag(xmlTag, ERROR="decompilation error")
245		else:
246			writer.begintag(xmlTag)
247		writer.newline()
248		if tag in ("glyf", "CFF "):
249			table.toXML(writer, self, progress)
250		else:
251			table.toXML(writer, self)
252		writer.endtag(xmlTag)
253		writer.newline()
254		writer.newline()
255
256	def importXML(self, file, progress=None):
257		"""Import a TTX file (an XML-based text format), so as to recreate
258		a font object.
259		"""
260		import xmlImport
261		xmlImport.importXML(self, file, progress)
262
263	def isLoaded(self, tag):
264		"""Return true if the table identified by 'tag' has been
265		decompiled and loaded into memory."""
266		return self.tables.has_key(tag)
267
268	def has_key(self, tag):
269		if self.isLoaded(tag):
270			return 1
271		elif self.reader and self.reader.has_key(tag):
272			return 1
273		elif tag == "GlyphOrder":
274			return 1
275		else:
276			return 0
277
278	def keys(self):
279		keys = self.tables.keys()
280		if self.reader:
281			for key in self.reader.keys():
282				if key not in keys:
283					keys.append(key)
284		keys.sort()
285		if "GlyphOrder" in keys:
286			keys.remove("GlyphOrder")
287		return ["GlyphOrder"] + keys
288
289	def __len__(self):
290		return len(self.keys())
291
292	def __getitem__(self, tag):
293		try:
294			return self.tables[tag]
295		except KeyError:
296			if tag == "GlyphOrder":
297				table = GlyphOrder(tag)
298				self.tables[tag] = table
299				return table
300			if self.reader is not None:
301				import traceback
302				if self.verbose:
303					debugmsg("reading '%s' table from disk" % tag)
304				data = self.reader[tag]
305				tableClass = getTableClass(tag)
306				table = tableClass(tag)
307				self.tables[tag] = table
308				if self.verbose:
309					debugmsg("decompiling '%s' table" % tag)
310				try:
311					table.decompile(data, self)
312				except "_ _ F O O _ _": # dummy exception to disable exception catching
313					print "An exception occurred during the decompilation of the '%s' table" % tag
314					from tables.DefaultTable import DefaultTable
315					import StringIO
316					file = StringIO.StringIO()
317					traceback.print_exc(file=file)
318					table = DefaultTable(tag)
319					table.ERROR = file.getvalue()
320					self.tables[tag] = table
321					table.decompile(data, self)
322				return table
323			else:
324				raise KeyError, "'%s' table not found" % tag
325
326	def __setitem__(self, tag, table):
327		self.tables[tag] = table
328
329	def __delitem__(self, tag):
330		if not self.has_key(tag):
331			raise KeyError, "'%s' table not found" % tag
332		if self.tables.has_key(tag):
333			del self.tables[tag]
334		if self.reader and self.reader.has_key(tag):
335			del self.reader[tag]
336
337	def setGlyphOrder(self, glyphOrder):
338		self.glyphOrder = glyphOrder
339		if self.has_key('CFF '):
340			self['CFF '].setGlyphOrder(glyphOrder)
341		if self.has_key('glyf'):
342			self['glyf'].setGlyphOrder(glyphOrder)
343
344	def getGlyphOrder(self):
345		try:
346			return self.glyphOrder
347		except AttributeError:
348			pass
349		if self.has_key('CFF '):
350			cff = self['CFF ']
351			if cff.haveGlyphNames():
352				self.glyphOrder = cff.getGlyphOrder()
353			else:
354				# CID-keyed font, use cmap
355				self._getGlyphNamesFromCmap()
356		elif self.has_key('post'):
357			# TrueType font
358			glyphOrder = self['post'].getGlyphOrder()
359			if glyphOrder is None:
360				#
361				# No names found in the 'post' table.
362				# Try to create glyph names from the unicode cmap (if available)
363				# in combination with the Adobe Glyph List (AGL).
364				#
365				self._getGlyphNamesFromCmap()
366			else:
367				self.glyphOrder = glyphOrder
368		else:
369			self._getGlyphNamesFromCmap()
370		return self.glyphOrder
371
372	def _getGlyphNamesFromCmap(self):
373		#
374		# This is rather convoluted, but then again, it's an interesting problem:
375		# - we need to use the unicode values found in the cmap table to
376		#   build glyph names (eg. because there is only a minimal post table,
377		#   or none at all).
378		# - but the cmap parser also needs glyph names to work with...
379		# So here's what we do:
380		# - make up glyph names based on glyphID
381		# - load a temporary cmap table based on those names
382		# - extract the unicode values, build the "real" glyph names
383		# - unload the temporary cmap table
384		#
385		if self.isLoaded("cmap"):
386			# Bootstrapping: we're getting called by the cmap parser
387			# itself. This means self.tables['cmap'] contains a partially
388			# loaded cmap, making it impossible to get at a unicode
389			# subtable here. We remove the partially loaded cmap and
390			# restore it later.
391			# This only happens if the cmap table is loaded before any
392			# other table that does f.getGlyphOrder()  or f.getGlyphName().
393			cmapLoading = self.tables['cmap']
394			del self.tables['cmap']
395		else:
396			cmapLoading = None
397		# Make up glyph names based on glyphID, which will be used by the
398		# temporary cmap and by the real cmap in case we don't find a unicode
399		# cmap.
400		numGlyphs = int(self['maxp'].numGlyphs)
401		glyphOrder = [None] * numGlyphs
402		glyphOrder[0] = ".notdef"
403		for i in range(1, numGlyphs):
404			glyphOrder[i] = "glyph%.5d" % i
405		# Set the glyph order, so the cmap parser has something
406		# to work with (so we don't get called recursively).
407		self.glyphOrder = glyphOrder
408		# Get a (new) temporary cmap (based on the just invented names)
409		tempcmap = self['cmap'].getcmap(3, 1)
410		if tempcmap is not None:
411			# we have a unicode cmap
412			from fontTools import agl
413			cmap = tempcmap.cmap
414			# create a reverse cmap dict
415			reversecmap = {}
416			for unicode, name in cmap.items():
417				reversecmap[name] = unicode
418			allNames = {}
419			for i in range(numGlyphs):
420				tempName = glyphOrder[i]
421				if reversecmap.has_key(tempName):
422					unicode = reversecmap[tempName]
423					if agl.UV2AGL.has_key(unicode):
424						# get name from the Adobe Glyph List
425						glyphName = agl.UV2AGL[unicode]
426					else:
427						# create uni<CODE> name
428						glyphName = "uni" + string.upper(string.zfill(
429								hex(unicode)[2:], 4))
430					tempName = glyphName
431					n = 1
432					while allNames.has_key(tempName):
433						tempName = glyphName + "#" + `n`
434						n = n + 1
435					glyphOrder[i] = tempName
436					allNames[tempName] = 1
437			# Delete the temporary cmap table from the cache, so it can
438			# be parsed again with the right names.
439			del self.tables['cmap']
440		else:
441			pass # no unicode cmap available, stick with the invented names
442		self.glyphOrder = glyphOrder
443		if cmapLoading:
444			# restore partially loaded cmap, so it can continue loading
445			# using the proper names.
446			self.tables['cmap'] = cmapLoading
447
448	def getGlyphNames(self):
449		"""Get a list of glyph names, sorted alphabetically."""
450		glyphNames = self.getGlyphOrder()[:]
451		glyphNames.sort()
452		return glyphNames
453
454	def getGlyphNames2(self):
455		"""Get a list of glyph names, sorted alphabetically,
456		but not case sensitive.
457		"""
458		from fontTools.misc import textTools
459		return textTools.caselessSort(self.getGlyphOrder())
460
461	def getGlyphName(self, glyphID):
462		try:
463			return self.getGlyphOrder()[glyphID]
464		except IndexError:
465			# XXX The ??.W8.otf font that ships with OSX uses higher glyphIDs in
466			# the cmap table than there are glyphs. I don't think it's legal...
467			return "glyph%.5d" % glyphID
468
469	def getGlyphID(self, glyphName):
470		if not hasattr(self, "_reverseGlyphOrderDict"):
471			self._buildReverseGlyphOrderDict()
472		glyphOrder = self.getGlyphOrder()
473		d = self._reverseGlyphOrderDict
474		if not d.has_key(glyphName):
475			if glyphName in glyphOrder:
476				self._buildReverseGlyphOrderDict()
477				return self.getGlyphID(glyphName)
478			else:
479				raise KeyError, glyphName
480		glyphID = d[glyphName]
481		if glyphName <> glyphOrder[glyphID]:
482			self._buildReverseGlyphOrderDict()
483			return self.getGlyphID(glyphName)
484		return glyphID
485
486	def _buildReverseGlyphOrderDict(self):
487		self._reverseGlyphOrderDict = d = {}
488		glyphOrder = self.getGlyphOrder()
489		for glyphID in range(len(glyphOrder)):
490			d[glyphOrder[glyphID]] = glyphID
491
492	def _writeTable(self, tag, writer, done):
493		"""Internal helper function for self.save(). Keeps track of
494		inter-table dependencies.
495		"""
496		if tag in done:
497			return
498		tableClass = getTableClass(tag)
499		for masterTable in tableClass.dependencies:
500			if masterTable not in done:
501				if self.has_key(masterTable):
502					self._writeTable(masterTable, writer, done)
503				else:
504					done.append(masterTable)
505		tabledata = self.getTableData(tag)
506		if self.verbose:
507			debugmsg("writing '%s' table to disk" % tag)
508		writer[tag] = tabledata
509		done.append(tag)
510
511	def getTableData(self, tag):
512		"""Returns raw table data, whether compiled or directly read from disk.
513		"""
514		if self.isLoaded(tag):
515			if self.verbose:
516				debugmsg("compiling '%s' table" % tag)
517			return self.tables[tag].compile(self)
518		elif self.reader and self.reader.has_key(tag):
519			if self.verbose:
520				debugmsg("reading '%s' table from disk" % tag)
521			return self.reader[tag]
522		else:
523			raise KeyError, tag
524
525
526class GlyphOrder:
527
528	"""A fake table. The glyph order isn't in the font as a separate table,
529	but it's nice to present it as such in the TTX format.
530	"""
531
532	def __init__(self, tag):
533		pass
534
535	def toXML(self, writer, ttFont):
536		glyphOrder = ttFont.getGlyphOrder()
537		writer.comment("The 'id' attribute is merely a reading aid; "
538				"it is ignored when read.")
539		writer.newline()
540		for i in range(len(glyphOrder)):
541			glyphName = glyphOrder[i]
542			writer.simpletag("GlyphID", id=i, name=glyphName)
543			writer.newline()
544
545	def fromXML(self, (name, attrs, content), ttFont):
546		if not hasattr(self, "glyphOrder"):
547			self.glyphOrder = []
548			ttFont.setGlyphOrder(self.glyphOrder)
549		if name == "GlyphID":
550			self.glyphOrder.append(attrs["name"])
551
552
553def _test_endianness():
554	"""Test the endianness of the machine. This is crucial to know
555	since TrueType data is always big endian, even on little endian
556	machines. There are quite a few situations where we explicitly
557	need to swap some bytes.
558	"""
559	import struct
560	data = struct.pack("h", 0x01)
561	if data == "\000\001":
562		return "big"
563	elif data == "\001\000":
564		return "little"
565	else:
566		assert 0, "endian confusion!"
567
568endian = _test_endianness()
569
570
571def getTableModule(tag):
572	"""Fetch the packer/unpacker module for a table.
573	Return None when no module is found.
574	"""
575	import imp
576	import tables
577	pyTag = tagToIdentifier(tag)
578	try:
579		f, path, kind = imp.find_module(pyTag, tables.__path__)
580		if f:
581			f.close()
582	except ImportError:
583		return None
584	else:
585		module = __import__("fontTools.ttLib.tables." + pyTag)
586		return getattr(tables, pyTag)
587
588
589def getTableClass(tag):
590	"""Fetch the packer/unpacker class for a table.
591	Return None when no class is found.
592	"""
593	module = getTableModule(tag)
594	if module is None:
595		from tables.DefaultTable import DefaultTable
596		return DefaultTable
597	pyTag = tagToIdentifier(tag)
598	tableClass = getattr(module, "table_" + pyTag)
599	return tableClass
600
601
602def newTable(tag):
603	"""Return a new instance of a table."""
604	tableClass = getTableClass(tag)
605	return tableClass(tag)
606
607
608def _escapechar(c):
609	"""Helper function for tagToIdentifier()"""
610	import re
611	if re.match("[a-z0-9]", c):
612		return "_" + c
613	elif re.match("[A-Z]", c):
614		return c + "_"
615	else:
616		return hex(ord(c))[2:]
617
618
619def tagToIdentifier(tag):
620	"""Convert a table tag to a valid (but UGLY) python identifier,
621	as well as a filename that's guaranteed to be unique even on a
622	caseless file system. Each character is mapped to two characters.
623	Lowercase letters get an underscore before the letter, uppercase
624	letters get an underscore after the letter. Trailing spaces are
625	trimmed. Illegal characters are escaped as two hex bytes. If the
626	result starts with a number (as the result of a hex escape), an
627	extra underscore is prepended. Examples:
628		'glyf' -> '_g_l_y_f'
629		'cvt ' -> '_c_v_t'
630		'OS/2' -> 'O_S_2f_2'
631	"""
632	import re
633	assert len(tag) == 4, "tag should be 4 characters long"
634	while len(tag) > 1 and tag[-1] == ' ':
635		tag = tag[:-1]
636	ident = ""
637	for c in tag:
638		ident = ident + _escapechar(c)
639	if re.match("[0-9]", ident):
640		ident = "_" + ident
641	return ident
642
643
644def identifierToTag(ident):
645	"""the opposite of tagToIdentifier()"""
646	if len(ident) % 2 and ident[0] == "_":
647		ident = ident[1:]
648	assert not (len(ident) % 2)
649	tag = ""
650	for i in range(0, len(ident), 2):
651		if ident[i] == "_":
652			tag = tag + ident[i+1]
653		elif ident[i+1] == "_":
654			tag = tag + ident[i]
655		else:
656			# assume hex
657			tag = tag + chr(string.atoi(ident[i:i+2], 16))
658	# append trailing spaces
659	tag = tag + (4 - len(tag)) * ' '
660	return tag
661
662
663def tagToXML(tag):
664	"""Similarly to tagToIdentifier(), this converts a TT tag
665	to a valid XML element name. Since XML element names are
666	case sensitive, this is a fairly simple/readable translation.
667	"""
668	import re
669	if tag == "OS/2":
670		return "OS_2"
671	elif tag == "GlyphOrder":
672		return "GlyphOrder"
673	if re.match("[A-Za-z_][A-Za-z_0-9]* *$", tag):
674		return string.strip(tag)
675	else:
676		return tagToIdentifier(tag)
677
678
679def xmlToTag(tag):
680	"""The opposite of tagToXML()"""
681	if tag == "OS_2":
682		return "OS/2"
683	if len(tag) == 8:
684		return identifierToTag(tag)
685	else:
686		return tag + " " * (4 - len(tag))
687	return tag
688
689
690def debugmsg(msg):
691	import time
692	print msg + time.strftime("  (%H:%M:%S)", time.localtime(time.time()))
693
694