__init__.py revision 6ab979cacaa9c15666a526e05c669b7f87bb6de9
1"""fontTools.ttLib -- a package for dealing with TrueType fonts.
2
3This package offers translators to convert TrueType fonts to Python
4objects and vice versa, and additionally from Python to TTX (an XML-based
5text format) and vice versa.
6
7Example interactive session:
8
9Python 1.5.2c1 (#43, Mar  9 1999, 13:06:43)  [CW PPC w/GUSI w/MSL]
10Copyright 1991-1995 Stichting Mathematisch Centrum, Amsterdam
11>>> from fontTools import ttLib
12>>> tt = ttLib.TTFont("afont.ttf")
13>>> tt['maxp'].numGlyphs
14242
15>>> tt['OS/2'].achVendID
16'B&H\000'
17>>> tt['head'].unitsPerEm
182048
19>>> tt.saveXML("afont.ttx")
20Dumping 'LTSH' table...
21Dumping 'OS/2' table...
22Dumping 'VDMX' table...
23Dumping 'cmap' table...
24Dumping 'cvt ' table...
25Dumping 'fpgm' table...
26Dumping 'glyf' table...
27Dumping 'hdmx' table...
28Dumping 'head' table...
29Dumping 'hhea' table...
30Dumping 'hmtx' table...
31Dumping 'loca' table...
32Dumping 'maxp' table...
33Dumping 'name' table...
34Dumping 'post' table...
35Dumping 'prep' table...
36>>> tt2 = ttLib.TTFont()
37>>> tt2.importXML("afont.ttx")
38>>> tt2['maxp'].numGlyphs
39242
40>>>
41
42"""
43
44#
45# $Id: __init__.py,v 1.36 2002-07-23 16:43:55 jvr Exp $
46#
47
48import os
49import string
50import types
51
52
53class TTLibError(Exception): pass
54
55
56class TTFont:
57
58	"""The main font object. It manages file input and output, and offers
59	a convenient way of accessing tables.
60	Tables will be only decompiled when neccesary, ie. when they're actually
61	accessed. This means that simple operations can be extremely fast.
62	"""
63
64	def __init__(self, file=None, res_name_or_index=None,
65			sfntVersion="\000\001\000\000", checkChecksums=0,
66			verbose=0, recalcBBoxes=1):
67
68		"""The constructor can be called with a few different arguments.
69		When reading a font from disk, 'file' should be either a pathname
70		pointing to a file, or a readable file object.
71
72		It we're running on a Macintosh, 'res_name_or_index' maybe an sfnt
73		resource name or an sfnt resource index number or zero. The latter
74		case will cause TTLib to autodetect whether the file is a flat file
75		or a suitcase. (If it's a suitcase, only the first 'sfnt' resource
76		will be read!)
77
78		The 'checkChecksums' argument is used to specify how sfnt
79		checksums are treated upon reading a file from disk:
80			0: don't check (default)
81			1: check, print warnings if a wrong checksum is found
82			2: check, raise an exception if a wrong checksum is found.
83
84		The TTFont constructor can also be called without a 'file'
85		argument: this is the way to create a new empty font.
86		In this case you can optionally supply the 'sfntVersion' argument.
87
88		If the recalcBBoxes argument is false, a number of things will *not*
89		be recalculated upon save/compile:
90			1) glyph bounding boxes
91			2) maxp font bounding box
92			3) hhea min/max values
93		(1) is needed for certain kinds of CJK fonts (ask Werner Lemberg ;-).
94		Additionally, upon importing an TTX file, this option cause glyphs
95		to be compiled right away. This should reduce memory consumption
96		greatly, and therefore should have some impact on the time needed
97		to parse/compile large fonts.
98		"""
99
100		import sfnt
101		self.verbose = verbose
102		self.recalcBBoxes = recalcBBoxes
103		self.tables = {}
104		self.reader = None
105		if not file:
106			self.sfntVersion = sfntVersion
107			return
108		if type(file) == types.StringType:
109			if os.name == "mac" and res_name_or_index is not None:
110				# on the mac, we deal with sfnt resources as well as flat files
111				import macUtils
112				if res_name_or_index == 0:
113					if macUtils.getSFNTResIndices(file):
114						# get the first available sfnt font.
115						file = macUtils.SFNTResourceReader(file, 1)
116					else:
117						file = open(file, "rb")
118				else:
119					file = macUtils.SFNTResourceReader(file, res_name_or_index)
120			else:
121				file = open(file, "rb")
122		else:
123			pass # assume "file" is a readable file object
124		self.reader = sfnt.SFNTReader(file, checkChecksums)
125		self.sfntVersion = self.reader.sfntVersion
126
127	def close(self):
128		"""If we still have a reader object, close it."""
129		if self.reader is not None:
130			self.reader.close()
131
132	def save(self, file, makeSuitcase=0):
133		"""Save the font to disk. Similarly to the constructor,
134		the 'file' argument can be either a pathname or a writable
135		file object.
136
137		On the Mac, if makeSuitcase is true, a suitcase (resource fork)
138		file will we made instead of a flat .ttf file.
139		"""
140		from fontTools.ttLib import sfnt
141		if type(file) == types.StringType:
142			closeStream = 1
143			if os.name == "mac" and makeSuitcase:
144				import macUtils
145				file = macUtils.SFNTResourceWriter(file, self)
146			else:
147				file = open(file, "wb")
148				if os.name == "mac":
149					import macfs
150					fss = macfs.FSSpec(file.name)
151					fss.SetCreatorType('mdos', 'BINA')
152		else:
153			# assume "file" is a writable file object
154			closeStream = 0
155
156		tags = self.keys()
157		tags.remove("GlyphOrder")
158		numTables = len(tags)
159		writer = sfnt.SFNTWriter(file, numTables, self.sfntVersion)
160
161		done = []
162		for tag in tags:
163			self._writeTable(tag, writer, done)
164
165		writer.close(closeStream)
166
167	def saveXML(self, fileOrPath, progress=None,
168			tables=None, skipTables=None, splitTables=0, disassembleInstructions=1):
169		"""Export the font as TTX (an XML-based text file), or as a series of text
170		files when splitTables is true. In the latter case, the 'fileOrPath'
171		argument should be a path to a directory.
172		The 'tables' argument must either be false (dump all tables) or a
173		list of tables to dump. The 'skipTables' argument may be a list of tables
174		to skip, but only when the 'tables' argument is false.
175		"""
176		from fontTools import version
177		import xmlWriter
178
179		self.disassembleInstructions = disassembleInstructions
180		if not tables:
181			tables = self.keys()
182			if skipTables:
183				for tag in skipTables:
184					if tag in tables:
185						tables.remove(tag)
186		numTables = len(tables)
187		numGlyphs = self['maxp'].numGlyphs
188		if progress:
189			progress.set(0, numTables)
190			idlefunc = getattr(progress, "idle", None)
191		else:
192			idlefunc = None
193
194		writer = xmlWriter.XMLWriter(fileOrPath, idlefunc=idlefunc)
195		writer.begintag("ttFont", sfntVersion=`self.sfntVersion`[1:-1],
196				ttLibVersion=version)
197		writer.newline()
198
199		if not splitTables:
200			writer.newline()
201		else:
202			# 'fileOrPath' must now be a path
203			path, ext = os.path.splitext(fileOrPath)
204			fileNameTemplate = path + ".%s" + ext
205
206		for i in range(numTables):
207			if progress:
208				progress.set(i)
209			tag = tables[i]
210			if splitTables:
211				tablePath = fileNameTemplate % tagToIdentifier(tag)
212				tableWriter = xmlWriter.XMLWriter(tablePath, idlefunc=idlefunc)
213				tableWriter.begintag("ttFont", ttLibVersion=version)
214				tableWriter.newline()
215				tableWriter.newline()
216				writer.simpletag(tagToXML(tag), src=os.path.basename(tablePath))
217				writer.newline()
218			else:
219				tableWriter = writer
220			self._tableToXML(tableWriter, tag, progress)
221			if splitTables:
222				tableWriter.endtag("ttFont")
223				tableWriter.newline()
224				tableWriter.close()
225		if progress:
226			progress.set((i + 1))
227		writer.endtag("ttFont")
228		writer.newline()
229		writer.close()
230		if self.verbose:
231			debugmsg("Done dumping TTX")
232
233	def _tableToXML(self, writer, tag, progress):
234		if self.has_key(tag):
235			table = self[tag]
236			report = "Dumping '%s' table..." % tag
237		else:
238			report = "No '%s' table found." % tag
239		if progress:
240			progress.setLabel(report)
241		elif self.verbose:
242			debugmsg(report)
243		else:
244			print report
245		if not self.has_key(tag):
246			return
247		xmlTag = tagToXML(tag)
248		if hasattr(table, "ERROR"):
249			writer.begintag(xmlTag, ERROR="decompilation error")
250		else:
251			writer.begintag(xmlTag)
252		writer.newline()
253		if tag in ("glyf", "CFF "):
254			table.toXML(writer, self, progress)
255		else:
256			table.toXML(writer, self)
257		writer.endtag(xmlTag)
258		writer.newline()
259		writer.newline()
260
261	def importXML(self, file, progress=None):
262		"""Import a TTX file (an XML-based text format), so as to recreate
263		a font object.
264		"""
265		if self.has_key("maxp") and self.has_key("post"):
266			# Make sure the glyph order is loaded, as it otherwise gets
267			# lost if the XML doesn't contain the glyph order, yet does
268			# contain the table which was originally used to extract the
269			# glyph names from (ie. 'post', 'cmap' or 'CFF ').
270			self.getGlyphOrder()
271		import xmlImport
272		xmlImport.importXML(self, file, progress)
273
274	def isLoaded(self, tag):
275		"""Return true if the table identified by 'tag' has been
276		decompiled and loaded into memory."""
277		return self.tables.has_key(tag)
278
279	def has_key(self, tag):
280		if self.isLoaded(tag):
281			return 1
282		elif self.reader and self.reader.has_key(tag):
283			return 1
284		elif tag == "GlyphOrder":
285			return 1
286		else:
287			return 0
288
289	def keys(self):
290		keys = self.tables.keys()
291		if self.reader:
292			for key in self.reader.keys():
293				if key not in keys:
294					keys.append(key)
295		keys.sort()
296		if "GlyphOrder" in keys:
297			keys.remove("GlyphOrder")
298		return ["GlyphOrder"] + keys
299
300	def __len__(self):
301		return len(self.keys())
302
303	def __getitem__(self, tag):
304		try:
305			return self.tables[tag]
306		except KeyError:
307			if tag == "GlyphOrder":
308				table = GlyphOrder(tag)
309				self.tables[tag] = table
310				return table
311			if self.reader is not None:
312				import traceback
313				if self.verbose:
314					debugmsg("Reading '%s' table from disk" % tag)
315				data = self.reader[tag]
316				tableClass = getTableClass(tag)
317				table = tableClass(tag)
318				self.tables[tag] = table
319				if self.verbose:
320					debugmsg("Decompiling '%s' table" % tag)
321				try:
322					table.decompile(data, self)
323				except "_ _ F O O _ _": # dummy exception to disable exception catching
324					print "An exception occurred during the decompilation of the '%s' table" % tag
325					from tables.DefaultTable import DefaultTable
326					import StringIO
327					file = StringIO.StringIO()
328					traceback.print_exc(file=file)
329					table = DefaultTable(tag)
330					table.ERROR = file.getvalue()
331					self.tables[tag] = table
332					table.decompile(data, self)
333				return table
334			else:
335				raise KeyError, "'%s' table not found" % tag
336
337	def __setitem__(self, tag, table):
338		self.tables[tag] = table
339
340	def __delitem__(self, tag):
341		if not self.has_key(tag):
342			raise KeyError, "'%s' table not found" % tag
343		if self.tables.has_key(tag):
344			del self.tables[tag]
345		if self.reader and self.reader.has_key(tag):
346			del self.reader[tag]
347
348	def setGlyphOrder(self, glyphOrder):
349		self.glyphOrder = glyphOrder
350
351	def getGlyphOrder(self):
352		try:
353			return self.glyphOrder
354		except AttributeError:
355			pass
356		if self.has_key('CFF '):
357			cff = self['CFF ']
358			if cff.haveGlyphNames():
359				self.glyphOrder = cff.getGlyphOrder()
360			else:
361				# CID-keyed font, use cmap
362				self._getGlyphNamesFromCmap()
363		elif self.has_key('post'):
364			# TrueType font
365			glyphOrder = self['post'].getGlyphOrder()
366			if glyphOrder is None:
367				#
368				# No names found in the 'post' table.
369				# Try to create glyph names from the unicode cmap (if available)
370				# in combination with the Adobe Glyph List (AGL).
371				#
372				self._getGlyphNamesFromCmap()
373			else:
374				self.glyphOrder = glyphOrder
375		else:
376			self._getGlyphNamesFromCmap()
377		return self.glyphOrder
378
379	def _getGlyphNamesFromCmap(self):
380		#
381		# This is rather convoluted, but then again, it's an interesting problem:
382		# - we need to use the unicode values found in the cmap table to
383		#   build glyph names (eg. because there is only a minimal post table,
384		#   or none at all).
385		# - but the cmap parser also needs glyph names to work with...
386		# So here's what we do:
387		# - make up glyph names based on glyphID
388		# - load a temporary cmap table based on those names
389		# - extract the unicode values, build the "real" glyph names
390		# - unload the temporary cmap table
391		#
392		if self.isLoaded("cmap"):
393			# Bootstrapping: we're getting called by the cmap parser
394			# itself. This means self.tables['cmap'] contains a partially
395			# loaded cmap, making it impossible to get at a unicode
396			# subtable here. We remove the partially loaded cmap and
397			# restore it later.
398			# This only happens if the cmap table is loaded before any
399			# other table that does f.getGlyphOrder()  or f.getGlyphName().
400			cmapLoading = self.tables['cmap']
401			del self.tables['cmap']
402		else:
403			cmapLoading = None
404		# Make up glyph names based on glyphID, which will be used by the
405		# temporary cmap and by the real cmap in case we don't find a unicode
406		# cmap.
407		numGlyphs = int(self['maxp'].numGlyphs)
408		glyphOrder = [None] * numGlyphs
409		glyphOrder[0] = ".notdef"
410		for i in range(1, numGlyphs):
411			glyphOrder[i] = "glyph%.5d" % i
412		# Set the glyph order, so the cmap parser has something
413		# to work with (so we don't get called recursively).
414		self.glyphOrder = glyphOrder
415		# Get a (new) temporary cmap (based on the just invented names)
416		tempcmap = self['cmap'].getcmap(3, 1)
417		if tempcmap is not None:
418			# we have a unicode cmap
419			from fontTools import agl
420			cmap = tempcmap.cmap
421			# create a reverse cmap dict
422			reversecmap = {}
423			for unicode, name in cmap.items():
424				reversecmap[name] = unicode
425			allNames = {}
426			for i in range(numGlyphs):
427				tempName = glyphOrder[i]
428				if reversecmap.has_key(tempName):
429					unicode = reversecmap[tempName]
430					if agl.UV2AGL.has_key(unicode):
431						# get name from the Adobe Glyph List
432						glyphName = agl.UV2AGL[unicode]
433					else:
434						# create uni<CODE> name
435						glyphName = "uni" + string.upper(string.zfill(
436								hex(unicode)[2:], 4))
437					tempName = glyphName
438					n = 1
439					while allNames.has_key(tempName):
440						tempName = glyphName + "#" + `n`
441						n = n + 1
442					glyphOrder[i] = tempName
443					allNames[tempName] = 1
444			# Delete the temporary cmap table from the cache, so it can
445			# be parsed again with the right names.
446			del self.tables['cmap']
447		else:
448			pass # no unicode cmap available, stick with the invented names
449		self.glyphOrder = glyphOrder
450		if cmapLoading:
451			# restore partially loaded cmap, so it can continue loading
452			# using the proper names.
453			self.tables['cmap'] = cmapLoading
454
455	def getGlyphNames(self):
456		"""Get a list of glyph names, sorted alphabetically."""
457		glyphNames = self.getGlyphOrder()[:]
458		glyphNames.sort()
459		return glyphNames
460
461	def getGlyphNames2(self):
462		"""Get a list of glyph names, sorted alphabetically,
463		but not case sensitive.
464		"""
465		from fontTools.misc import textTools
466		return textTools.caselessSort(self.getGlyphOrder())
467
468	def getGlyphName(self, glyphID):
469		try:
470			return self.getGlyphOrder()[glyphID]
471		except IndexError:
472			# XXX The ??.W8.otf font that ships with OSX uses higher glyphIDs in
473			# the cmap table than there are glyphs. I don't think it's legal...
474			return "glyph%.5d" % glyphID
475
476	def getGlyphID(self, glyphName):
477		if not hasattr(self, "_reverseGlyphOrderDict"):
478			self._buildReverseGlyphOrderDict()
479		glyphOrder = self.getGlyphOrder()
480		d = self._reverseGlyphOrderDict
481		if not d.has_key(glyphName):
482			if glyphName in glyphOrder:
483				self._buildReverseGlyphOrderDict()
484				return self.getGlyphID(glyphName)
485			else:
486				raise KeyError, glyphName
487		glyphID = d[glyphName]
488		if glyphName <> glyphOrder[glyphID]:
489			self._buildReverseGlyphOrderDict()
490			return self.getGlyphID(glyphName)
491		return glyphID
492
493	def _buildReverseGlyphOrderDict(self):
494		self._reverseGlyphOrderDict = d = {}
495		glyphOrder = self.getGlyphOrder()
496		for glyphID in range(len(glyphOrder)):
497			d[glyphOrder[glyphID]] = glyphID
498
499	def _writeTable(self, tag, writer, done):
500		"""Internal helper function for self.save(). Keeps track of
501		inter-table dependencies.
502		"""
503		if tag in done:
504			return
505		tableClass = getTableClass(tag)
506		for masterTable in tableClass.dependencies:
507			if masterTable not in done:
508				if self.has_key(masterTable):
509					self._writeTable(masterTable, writer, done)
510				else:
511					done.append(masterTable)
512		tabledata = self.getTableData(tag)
513		if self.verbose:
514			debugmsg("writing '%s' table to disk" % tag)
515		writer[tag] = tabledata
516		done.append(tag)
517
518	def getTableData(self, tag):
519		"""Returns raw table data, whether compiled or directly read from disk.
520		"""
521		if self.isLoaded(tag):
522			if self.verbose:
523				debugmsg("compiling '%s' table" % tag)
524			return self.tables[tag].compile(self)
525		elif self.reader and self.reader.has_key(tag):
526			if self.verbose:
527				debugmsg("Reading '%s' table from disk" % tag)
528			return self.reader[tag]
529		else:
530			raise KeyError, tag
531
532
533class GlyphOrder:
534
535	"""A pseudo table. The glyph order isn't in the font as a separate
536	table, but it's nice to present it as such in the TTX format.
537	"""
538
539	def __init__(self, tag):
540		pass
541
542	def toXML(self, writer, ttFont):
543		glyphOrder = ttFont.getGlyphOrder()
544		writer.comment("The 'id' attribute is only for humans; "
545				"it is ignored when parsed.")
546		writer.newline()
547		for i in range(len(glyphOrder)):
548			glyphName = glyphOrder[i]
549			writer.simpletag("GlyphID", id=i, name=glyphName)
550			writer.newline()
551
552	def fromXML(self, (name, attrs, content), ttFont):
553		if not hasattr(self, "glyphOrder"):
554			self.glyphOrder = []
555			ttFont.setGlyphOrder(self.glyphOrder)
556		if name == "GlyphID":
557			self.glyphOrder.append(attrs["name"])
558
559
560def _test_endianness():
561	"""Test the endianness of the machine. This is crucial to know
562	since TrueType data is always big endian, even on little endian
563	machines. There are quite a few situations where we explicitly
564	need to swap some bytes.
565	"""
566	import struct
567	data = struct.pack("h", 0x01)
568	if data == "\000\001":
569		return "big"
570	elif data == "\001\000":
571		return "little"
572	else:
573		assert 0, "endian confusion!"
574
575endian = _test_endianness()
576
577
578def getTableModule(tag):
579	"""Fetch the packer/unpacker module for a table.
580	Return None when no module is found.
581	"""
582	import tables
583	pyTag = tagToIdentifier(tag)
584	try:
585		module = __import__("fontTools.ttLib.tables." + pyTag)
586	except ImportError:
587		return None
588	else:
589		return getattr(tables, pyTag)
590
591
592def getTableClass(tag):
593	"""Fetch the packer/unpacker class for a table.
594	Return None when no class is found.
595	"""
596	module = getTableModule(tag)
597	if module is None:
598		from tables.DefaultTable import DefaultTable
599		return DefaultTable
600	pyTag = tagToIdentifier(tag)
601	tableClass = getattr(module, "table_" + pyTag)
602	return tableClass
603
604
605def newTable(tag):
606	"""Return a new instance of a table."""
607	tableClass = getTableClass(tag)
608	return tableClass(tag)
609
610
611def _escapechar(c):
612	"""Helper function for tagToIdentifier()"""
613	import re
614	if re.match("[a-z0-9]", c):
615		return "_" + c
616	elif re.match("[A-Z]", c):
617		return c + "_"
618	else:
619		return hex(ord(c))[2:]
620
621
622def tagToIdentifier(tag):
623	"""Convert a table tag to a valid (but UGLY) python identifier,
624	as well as a filename that's guaranteed to be unique even on a
625	caseless file system. Each character is mapped to two characters.
626	Lowercase letters get an underscore before the letter, uppercase
627	letters get an underscore after the letter. Trailing spaces are
628	trimmed. Illegal characters are escaped as two hex bytes. If the
629	result starts with a number (as the result of a hex escape), an
630	extra underscore is prepended. Examples:
631		'glyf' -> '_g_l_y_f'
632		'cvt ' -> '_c_v_t'
633		'OS/2' -> 'O_S_2f_2'
634	"""
635	import re
636	if tag == "GlyphOrder":
637		return tag
638	assert len(tag) == 4, "tag should be 4 characters long"
639	while len(tag) > 1 and tag[-1] == ' ':
640		tag = tag[:-1]
641	ident = ""
642	for c in tag:
643		ident = ident + _escapechar(c)
644	if re.match("[0-9]", ident):
645		ident = "_" + ident
646	return ident
647
648
649def identifierToTag(ident):
650	"""the opposite of tagToIdentifier()"""
651	if ident == "GlyphOrder":
652		return ident
653	if len(ident) % 2 and ident[0] == "_":
654		ident = ident[1:]
655	assert not (len(ident) % 2)
656	tag = ""
657	for i in range(0, len(ident), 2):
658		if ident[i] == "_":
659			tag = tag + ident[i+1]
660		elif ident[i+1] == "_":
661			tag = tag + ident[i]
662		else:
663			# assume hex
664			tag = tag + chr(string.atoi(ident[i:i+2], 16))
665	# append trailing spaces
666	tag = tag + (4 - len(tag)) * ' '
667	return tag
668
669
670def tagToXML(tag):
671	"""Similarly to tagToIdentifier(), this converts a TT tag
672	to a valid XML element name. Since XML element names are
673	case sensitive, this is a fairly simple/readable translation.
674	"""
675	import re
676	if tag == "OS/2":
677		return "OS_2"
678	elif tag == "GlyphOrder":
679		return "GlyphOrder"
680	if re.match("[A-Za-z_][A-Za-z_0-9]* *$", tag):
681		return string.strip(tag)
682	else:
683		return tagToIdentifier(tag)
684
685
686def xmlToTag(tag):
687	"""The opposite of tagToXML()"""
688	if tag == "OS_2":
689		return "OS/2"
690	if len(tag) == 8:
691		return identifierToTag(tag)
692	else:
693		return tag + " " * (4 - len(tag))
694	return tag
695
696
697def debugmsg(msg):
698	import time
699	print msg + time.strftime("  (%H:%M:%S)", time.localtime(time.time()))
700
701