__init__.py revision d57c4346e5725bbfe0e1b13067b292c3faaaeb8e
1"""fontTools.ttLib -- a package for dealing with TrueType fonts.
2
3This package offers translators to convert TrueType fonts to Python
4objects and vice versa, and additionally from Python to TTX (an XML-based
5text format) and vice versa.
6
7Example interactive session:
8
9Python 1.5.2c1 (#43, Mar  9 1999, 13:06:43)  [CW PPC w/GUSI w/MSL]
10Copyright 1991-1995 Stichting Mathematisch Centrum, Amsterdam
11>>> from fontTools import ttLib
12>>> tt = ttLib.TTFont("afont.ttf")
13>>> tt['maxp'].numGlyphs
14242
15>>> tt['OS/2'].achVendID
16'B&H\000'
17>>> tt['head'].unitsPerEm
182048
19>>> tt.saveXML("afont.ttx")
20Dumping 'LTSH' table...
21Dumping 'OS/2' table...
22Dumping 'VDMX' table...
23Dumping 'cmap' table...
24Dumping 'cvt ' table...
25Dumping 'fpgm' table...
26Dumping 'glyf' table...
27Dumping 'hdmx' table...
28Dumping 'head' table...
29Dumping 'hhea' table...
30Dumping 'hmtx' table...
31Dumping 'loca' table...
32Dumping 'maxp' table...
33Dumping 'name' table...
34Dumping 'post' table...
35Dumping 'prep' table...
36>>> tt2 = ttLib.TTFont()
37>>> tt2.importXML("afont.ttx")
38>>> tt2['maxp'].numGlyphs
39242
40>>>
41
42"""
43
44#
45# $Id: __init__.py,v 1.34 2002-05-25 15:28:48 jvr Exp $
46#
47
48import os
49import string
50import types
51
52
53class TTLibError(Exception): pass
54
55
56class TTFont:
57
58	"""The main font object. It manages file input and output, and offers
59	a convenient way of accessing tables.
60	Tables will be only decompiled when neccesary, ie. when they're actually
61	accessed. This means that simple operations can be extremely fast.
62	"""
63
64	def __init__(self, file=None, res_name_or_index=None,
65			sfntVersion="\000\001\000\000", checkChecksums=0,
66			verbose=0, recalcBBoxes=1):
67
68		"""The constructor can be called with a few different arguments.
69		When reading a font from disk, 'file' should be either a pathname
70		pointing to a file, or a readable file object.
71
72		It we're running on a Macintosh, 'res_name_or_index' maybe an sfnt
73		resource name or an sfnt resource index number or zero. The latter
74		case will cause TTLib to autodetect whether the file is a flat file
75		or a suitcase. (If it's a suitcase, only the first 'sfnt' resource
76		will be read!)
77
78		The 'checkChecksums' argument is used to specify how sfnt
79		checksums are treated upon reading a file from disk:
80			0: don't check (default)
81			1: check, print warnings if a wrong checksum is found
82			2: check, raise an exception if a wrong checksum is found.
83
84		The TTFont constructor can also be called without a 'file'
85		argument: this is the way to create a new empty font.
86		In this case you can optionally supply the 'sfntVersion' argument.
87
88		If the recalcBBoxes argument is false, a number of things will *not*
89		be recalculated upon save/compile:
90			1) glyph bounding boxes
91			2) maxp font bounding box
92			3) hhea min/max values
93		(1) is needed for certain kinds of CJK fonts (ask Werner Lemberg ;-).
94		Additionally, upon importing an TTX file, this option cause glyphs
95		to be compiled right away. This should reduce memory consumption
96		greatly, and therefore should have some impact on the time needed
97		to parse/compile large fonts.
98		"""
99
100		import sfnt
101		self.verbose = verbose
102		self.recalcBBoxes = recalcBBoxes
103		self.tables = {}
104		self.reader = None
105		if not file:
106			self.sfntVersion = sfntVersion
107			return
108		if type(file) == types.StringType:
109			if os.name == "mac" and res_name_or_index is not None:
110				# on the mac, we deal with sfnt resources as well as flat files
111				import macUtils
112				if res_name_or_index == 0:
113					if macUtils.getSFNTResIndices(file):
114						# get the first available sfnt font.
115						file = macUtils.SFNTResourceReader(file, 1)
116					else:
117						file = open(file, "rb")
118				else:
119					file = macUtils.SFNTResourceReader(file, res_name_or_index)
120			else:
121				file = open(file, "rb")
122		else:
123			pass # assume "file" is a readable file object
124		self.reader = sfnt.SFNTReader(file, checkChecksums)
125		self.sfntVersion = self.reader.sfntVersion
126
127	def close(self):
128		"""If we still have a reader object, close it."""
129		if self.reader is not None:
130			self.reader.close()
131
132	def save(self, file, makeSuitcase=0):
133		"""Save the font to disk. Similarly to the constructor,
134		the 'file' argument can be either a pathname or a writable
135		file object.
136
137		On the Mac, if makeSuitcase is true, a suitcase (resource fork)
138		file will we made instead of a flat .ttf file.
139		"""
140		from fontTools.ttLib import sfnt
141		if type(file) == types.StringType:
142			closeStream = 1
143			if os.name == "mac" and makeSuitcase:
144				import macUtils
145				file = macUtils.SFNTResourceWriter(file, self)
146			else:
147				file = open(file, "wb")
148				if os.name == "mac":
149					import macfs
150					fss = macfs.FSSpec(file.name)
151					fss.SetCreatorType('mdos', 'BINA')
152		else:
153			# assume "file" is a writable file object
154			closeStream = 0
155
156		tags = self.keys()
157		tags.remove("GlyphOrder")
158		numTables = len(tags)
159		writer = sfnt.SFNTWriter(file, numTables, self.sfntVersion)
160
161		done = []
162		for tag in tags:
163			self._writeTable(tag, writer, done)
164
165		writer.close(closeStream)
166
167	def saveXML(self, fileOrPath, progress=None,
168			tables=None, skipTables=None, splitTables=0, disassembleInstructions=1):
169		"""Export the font as TTX (an XML-based text file), or as a series of text
170		files when splitTables is true. In the latter case, the 'fileOrPath'
171		argument should be a path to a directory.
172		The 'tables' argument must either be false (dump all tables) or a
173		list of tables to dump. The 'skipTables' argument may be a list of tables
174		to skip, but only when the 'tables' argument is false.
175		"""
176		from fontTools import version
177		import xmlWriter
178
179		self.disassembleInstructions = disassembleInstructions
180		if not tables:
181			tables = self.keys()
182			if skipTables:
183				for tag in skipTables:
184					if tag in tables:
185						tables.remove(tag)
186		numTables = len(tables)
187		numGlyphs = self['maxp'].numGlyphs
188		if progress:
189			progress.set(0, numTables * numGlyphs)
190
191		writer = xmlWriter.XMLWriter(fileOrPath)
192		writer.begintag("ttFont", sfntVersion=`self.sfntVersion`[1:-1],
193				ttLibVersion=version)
194		writer.newline()
195
196		if not splitTables:
197			writer.newline()
198		else:
199			# 'fileOrPath' must now be a path
200			path, ext = os.path.splitext(fileOrPath)
201			fileNameTemplate = path + ".%s" + ext
202
203		for i in range(numTables):
204			tag = tables[i]
205			if splitTables:
206				tablePath = fileNameTemplate % tagToIdentifier(tag)
207				tableWriter = xmlWriter.XMLWriter(tablePath)
208				tableWriter.begintag("ttFont", ttLibVersion=version)
209				tableWriter.newline()
210				tableWriter.newline()
211				writer.simpletag(tagToXML(tag), src=os.path.basename(tablePath))
212				writer.newline()
213			else:
214				tableWriter = writer
215			self._tableToXML(tableWriter, tag, progress)
216			if splitTables:
217				tableWriter.endtag("ttFont")
218				tableWriter.newline()
219				tableWriter.close()
220			if progress:
221				progress.set(i * numGlyphs, numTables * numGlyphs)
222		writer.endtag("ttFont")
223		writer.newline()
224		writer.close()
225		if self.verbose:
226			debugmsg("Done dumping TTX")
227
228	def _tableToXML(self, writer, tag, progress):
229		if self.has_key(tag):
230			table = self[tag]
231			report = "Dumping '%s' table..." % tag
232		else:
233			report = "No '%s' table found." % tag
234		if progress:
235			progress.setlabel(report)
236		elif self.verbose:
237			debugmsg(report)
238		else:
239			print report
240		if not self.has_key(tag):
241			return
242		xmlTag = tagToXML(tag)
243		if hasattr(table, "ERROR"):
244			writer.begintag(xmlTag, ERROR="decompilation error")
245		else:
246			writer.begintag(xmlTag)
247		writer.newline()
248		if tag in ("glyf", "CFF "):
249			table.toXML(writer, self, progress)
250		else:
251			table.toXML(writer, self)
252		writer.endtag(xmlTag)
253		writer.newline()
254		writer.newline()
255
256	def importXML(self, file, progress=None):
257		"""Import a TTX file (an XML-based text format), so as to recreate
258		a font object.
259		"""
260		if self.has_key("maxp") and self.has_key("post"):
261			# Make sure the glyph order is loaded, as it otherwise gets
262			# lost if the XML doesn't contain the glyph order, yet does
263			# contain the table which was originally used to extract the
264			# glyph names from (ie. 'post', 'cmap' or 'CFF ').
265			self.getGlyphOrder()
266		import xmlImport
267		xmlImport.importXML(self, file, progress)
268
269	def isLoaded(self, tag):
270		"""Return true if the table identified by 'tag' has been
271		decompiled and loaded into memory."""
272		return self.tables.has_key(tag)
273
274	def has_key(self, tag):
275		if self.isLoaded(tag):
276			return 1
277		elif self.reader and self.reader.has_key(tag):
278			return 1
279		elif tag == "GlyphOrder":
280			return 1
281		else:
282			return 0
283
284	def keys(self):
285		keys = self.tables.keys()
286		if self.reader:
287			for key in self.reader.keys():
288				if key not in keys:
289					keys.append(key)
290		keys.sort()
291		if "GlyphOrder" in keys:
292			keys.remove("GlyphOrder")
293		return ["GlyphOrder"] + keys
294
295	def __len__(self):
296		return len(self.keys())
297
298	def __getitem__(self, tag):
299		try:
300			return self.tables[tag]
301		except KeyError:
302			if tag == "GlyphOrder":
303				table = GlyphOrder(tag)
304				self.tables[tag] = table
305				return table
306			if self.reader is not None:
307				import traceback
308				if self.verbose:
309					debugmsg("reading '%s' table from disk" % tag)
310				data = self.reader[tag]
311				tableClass = getTableClass(tag)
312				table = tableClass(tag)
313				self.tables[tag] = table
314				if self.verbose:
315					debugmsg("decompiling '%s' table" % tag)
316				try:
317					table.decompile(data, self)
318				except "_ _ F O O _ _": # dummy exception to disable exception catching
319					print "An exception occurred during the decompilation of the '%s' table" % tag
320					from tables.DefaultTable import DefaultTable
321					import StringIO
322					file = StringIO.StringIO()
323					traceback.print_exc(file=file)
324					table = DefaultTable(tag)
325					table.ERROR = file.getvalue()
326					self.tables[tag] = table
327					table.decompile(data, self)
328				return table
329			else:
330				raise KeyError, "'%s' table not found" % tag
331
332	def __setitem__(self, tag, table):
333		self.tables[tag] = table
334
335	def __delitem__(self, tag):
336		if not self.has_key(tag):
337			raise KeyError, "'%s' table not found" % tag
338		if self.tables.has_key(tag):
339			del self.tables[tag]
340		if self.reader and self.reader.has_key(tag):
341			del self.reader[tag]
342
343	def setGlyphOrder(self, glyphOrder):
344		self.glyphOrder = glyphOrder
345
346	def getGlyphOrder(self):
347		try:
348			return self.glyphOrder
349		except AttributeError:
350			pass
351		if self.has_key('CFF '):
352			cff = self['CFF ']
353			if cff.haveGlyphNames():
354				self.glyphOrder = cff.getGlyphOrder()
355			else:
356				# CID-keyed font, use cmap
357				self._getGlyphNamesFromCmap()
358		elif self.has_key('post'):
359			# TrueType font
360			glyphOrder = self['post'].getGlyphOrder()
361			if glyphOrder is None:
362				#
363				# No names found in the 'post' table.
364				# Try to create glyph names from the unicode cmap (if available)
365				# in combination with the Adobe Glyph List (AGL).
366				#
367				self._getGlyphNamesFromCmap()
368			else:
369				self.glyphOrder = glyphOrder
370		else:
371			self._getGlyphNamesFromCmap()
372		return self.glyphOrder
373
374	def _getGlyphNamesFromCmap(self):
375		#
376		# This is rather convoluted, but then again, it's an interesting problem:
377		# - we need to use the unicode values found in the cmap table to
378		#   build glyph names (eg. because there is only a minimal post table,
379		#   or none at all).
380		# - but the cmap parser also needs glyph names to work with...
381		# So here's what we do:
382		# - make up glyph names based on glyphID
383		# - load a temporary cmap table based on those names
384		# - extract the unicode values, build the "real" glyph names
385		# - unload the temporary cmap table
386		#
387		if self.isLoaded("cmap"):
388			# Bootstrapping: we're getting called by the cmap parser
389			# itself. This means self.tables['cmap'] contains a partially
390			# loaded cmap, making it impossible to get at a unicode
391			# subtable here. We remove the partially loaded cmap and
392			# restore it later.
393			# This only happens if the cmap table is loaded before any
394			# other table that does f.getGlyphOrder()  or f.getGlyphName().
395			cmapLoading = self.tables['cmap']
396			del self.tables['cmap']
397		else:
398			cmapLoading = None
399		# Make up glyph names based on glyphID, which will be used by the
400		# temporary cmap and by the real cmap in case we don't find a unicode
401		# cmap.
402		numGlyphs = int(self['maxp'].numGlyphs)
403		glyphOrder = [None] * numGlyphs
404		glyphOrder[0] = ".notdef"
405		for i in range(1, numGlyphs):
406			glyphOrder[i] = "glyph%.5d" % i
407		# Set the glyph order, so the cmap parser has something
408		# to work with (so we don't get called recursively).
409		self.glyphOrder = glyphOrder
410		# Get a (new) temporary cmap (based on the just invented names)
411		tempcmap = self['cmap'].getcmap(3, 1)
412		if tempcmap is not None:
413			# we have a unicode cmap
414			from fontTools import agl
415			cmap = tempcmap.cmap
416			# create a reverse cmap dict
417			reversecmap = {}
418			for unicode, name in cmap.items():
419				reversecmap[name] = unicode
420			allNames = {}
421			for i in range(numGlyphs):
422				tempName = glyphOrder[i]
423				if reversecmap.has_key(tempName):
424					unicode = reversecmap[tempName]
425					if agl.UV2AGL.has_key(unicode):
426						# get name from the Adobe Glyph List
427						glyphName = agl.UV2AGL[unicode]
428					else:
429						# create uni<CODE> name
430						glyphName = "uni" + string.upper(string.zfill(
431								hex(unicode)[2:], 4))
432					tempName = glyphName
433					n = 1
434					while allNames.has_key(tempName):
435						tempName = glyphName + "#" + `n`
436						n = n + 1
437					glyphOrder[i] = tempName
438					allNames[tempName] = 1
439			# Delete the temporary cmap table from the cache, so it can
440			# be parsed again with the right names.
441			del self.tables['cmap']
442		else:
443			pass # no unicode cmap available, stick with the invented names
444		self.glyphOrder = glyphOrder
445		if cmapLoading:
446			# restore partially loaded cmap, so it can continue loading
447			# using the proper names.
448			self.tables['cmap'] = cmapLoading
449
450	def getGlyphNames(self):
451		"""Get a list of glyph names, sorted alphabetically."""
452		glyphNames = self.getGlyphOrder()[:]
453		glyphNames.sort()
454		return glyphNames
455
456	def getGlyphNames2(self):
457		"""Get a list of glyph names, sorted alphabetically,
458		but not case sensitive.
459		"""
460		from fontTools.misc import textTools
461		return textTools.caselessSort(self.getGlyphOrder())
462
463	def getGlyphName(self, glyphID):
464		try:
465			return self.getGlyphOrder()[glyphID]
466		except IndexError:
467			# XXX The ??.W8.otf font that ships with OSX uses higher glyphIDs in
468			# the cmap table than there are glyphs. I don't think it's legal...
469			return "glyph%.5d" % glyphID
470
471	def getGlyphID(self, glyphName):
472		if not hasattr(self, "_reverseGlyphOrderDict"):
473			self._buildReverseGlyphOrderDict()
474		glyphOrder = self.getGlyphOrder()
475		d = self._reverseGlyphOrderDict
476		if not d.has_key(glyphName):
477			if glyphName in glyphOrder:
478				self._buildReverseGlyphOrderDict()
479				return self.getGlyphID(glyphName)
480			else:
481				raise KeyError, glyphName
482		glyphID = d[glyphName]
483		if glyphName <> glyphOrder[glyphID]:
484			self._buildReverseGlyphOrderDict()
485			return self.getGlyphID(glyphName)
486		return glyphID
487
488	def _buildReverseGlyphOrderDict(self):
489		self._reverseGlyphOrderDict = d = {}
490		glyphOrder = self.getGlyphOrder()
491		for glyphID in range(len(glyphOrder)):
492			d[glyphOrder[glyphID]] = glyphID
493
494	def _writeTable(self, tag, writer, done):
495		"""Internal helper function for self.save(). Keeps track of
496		inter-table dependencies.
497		"""
498		if tag in done:
499			return
500		tableClass = getTableClass(tag)
501		for masterTable in tableClass.dependencies:
502			if masterTable not in done:
503				if self.has_key(masterTable):
504					self._writeTable(masterTable, writer, done)
505				else:
506					done.append(masterTable)
507		tabledata = self.getTableData(tag)
508		if self.verbose:
509			debugmsg("writing '%s' table to disk" % tag)
510		writer[tag] = tabledata
511		done.append(tag)
512
513	def getTableData(self, tag):
514		"""Returns raw table data, whether compiled or directly read from disk.
515		"""
516		if self.isLoaded(tag):
517			if self.verbose:
518				debugmsg("compiling '%s' table" % tag)
519			return self.tables[tag].compile(self)
520		elif self.reader and self.reader.has_key(tag):
521			if self.verbose:
522				debugmsg("reading '%s' table from disk" % tag)
523			return self.reader[tag]
524		else:
525			raise KeyError, tag
526
527
528class GlyphOrder:
529
530	"""A pseudo table. The glyph order isn't in the font as a separate
531	table, but it's nice to present it as such in the TTX format.
532	"""
533
534	def __init__(self, tag):
535		pass
536
537	def toXML(self, writer, ttFont):
538		glyphOrder = ttFont.getGlyphOrder()
539		writer.comment("The 'id' attribute is only for humans; "
540				"it is ignored when parsed.")
541		writer.newline()
542		for i in range(len(glyphOrder)):
543			glyphName = glyphOrder[i]
544			writer.simpletag("GlyphID", id=i, name=glyphName)
545			writer.newline()
546
547	def fromXML(self, (name, attrs, content), ttFont):
548		if not hasattr(self, "glyphOrder"):
549			self.glyphOrder = []
550			ttFont.setGlyphOrder(self.glyphOrder)
551		if name == "GlyphID":
552			self.glyphOrder.append(attrs["name"])
553
554
555def _test_endianness():
556	"""Test the endianness of the machine. This is crucial to know
557	since TrueType data is always big endian, even on little endian
558	machines. There are quite a few situations where we explicitly
559	need to swap some bytes.
560	"""
561	import struct
562	data = struct.pack("h", 0x01)
563	if data == "\000\001":
564		return "big"
565	elif data == "\001\000":
566		return "little"
567	else:
568		assert 0, "endian confusion!"
569
570endian = _test_endianness()
571
572
573def getTableModule(tag):
574	"""Fetch the packer/unpacker module for a table.
575	Return None when no module is found.
576	"""
577	import imp
578	import tables
579	pyTag = tagToIdentifier(tag)
580	try:
581		f, path, kind = imp.find_module(pyTag, tables.__path__)
582		if f:
583			f.close()
584	except ImportError:
585		return None
586	else:
587		module = __import__("fontTools.ttLib.tables." + pyTag)
588		return getattr(tables, pyTag)
589
590
591def getTableClass(tag):
592	"""Fetch the packer/unpacker class for a table.
593	Return None when no class is found.
594	"""
595	module = getTableModule(tag)
596	if module is None:
597		from tables.DefaultTable import DefaultTable
598		return DefaultTable
599	pyTag = tagToIdentifier(tag)
600	tableClass = getattr(module, "table_" + pyTag)
601	return tableClass
602
603
604def newTable(tag):
605	"""Return a new instance of a table."""
606	tableClass = getTableClass(tag)
607	return tableClass(tag)
608
609
610def _escapechar(c):
611	"""Helper function for tagToIdentifier()"""
612	import re
613	if re.match("[a-z0-9]", c):
614		return "_" + c
615	elif re.match("[A-Z]", c):
616		return c + "_"
617	else:
618		return hex(ord(c))[2:]
619
620
621def tagToIdentifier(tag):
622	"""Convert a table tag to a valid (but UGLY) python identifier,
623	as well as a filename that's guaranteed to be unique even on a
624	caseless file system. Each character is mapped to two characters.
625	Lowercase letters get an underscore before the letter, uppercase
626	letters get an underscore after the letter. Trailing spaces are
627	trimmed. Illegal characters are escaped as two hex bytes. If the
628	result starts with a number (as the result of a hex escape), an
629	extra underscore is prepended. Examples:
630		'glyf' -> '_g_l_y_f'
631		'cvt ' -> '_c_v_t'
632		'OS/2' -> 'O_S_2f_2'
633	"""
634	import re
635	if tag == "GlyphOrder":
636		return tag
637	assert len(tag) == 4, "tag should be 4 characters long"
638	while len(tag) > 1 and tag[-1] == ' ':
639		tag = tag[:-1]
640	ident = ""
641	for c in tag:
642		ident = ident + _escapechar(c)
643	if re.match("[0-9]", ident):
644		ident = "_" + ident
645	return ident
646
647
648def identifierToTag(ident):
649	"""the opposite of tagToIdentifier()"""
650	if ident == "GlyphOrder":
651		return ident
652	if len(ident) % 2 and ident[0] == "_":
653		ident = ident[1:]
654	assert not (len(ident) % 2)
655	tag = ""
656	for i in range(0, len(ident), 2):
657		if ident[i] == "_":
658			tag = tag + ident[i+1]
659		elif ident[i+1] == "_":
660			tag = tag + ident[i]
661		else:
662			# assume hex
663			tag = tag + chr(string.atoi(ident[i:i+2], 16))
664	# append trailing spaces
665	tag = tag + (4 - len(tag)) * ' '
666	return tag
667
668
669def tagToXML(tag):
670	"""Similarly to tagToIdentifier(), this converts a TT tag
671	to a valid XML element name. Since XML element names are
672	case sensitive, this is a fairly simple/readable translation.
673	"""
674	import re
675	if tag == "OS/2":
676		return "OS_2"
677	elif tag == "GlyphOrder":
678		return "GlyphOrder"
679	if re.match("[A-Za-z_][A-Za-z_0-9]* *$", tag):
680		return string.strip(tag)
681	else:
682		return tagToIdentifier(tag)
683
684
685def xmlToTag(tag):
686	"""The opposite of tagToXML()"""
687	if tag == "OS_2":
688		return "OS/2"
689	if len(tag) == 8:
690		return identifierToTag(tag)
691	else:
692		return tag + " " * (4 - len(tag))
693	return tag
694
695
696def debugmsg(msg):
697	import time
698	print msg + time.strftime("  (%H:%M:%S)", time.localtime(time.time()))
699
700