psLib.py revision 528614e6e254dfe3c501ff440c291c6c55de5e6f
1import StringIO
2import regex
3import string
4from fontTools.misc import eexec
5import types
6from psOperators import *
7
8
9ps_special = '()<>[]{}%'	# / is one too, but we take care of that one differently
10
11whitespace = string.whitespace
12skipwhiteRE = regex.compile("[%s]*" % whitespace)
13
14endofthingPat = "[^][(){}<>/%s%s]*" % ('%', whitespace)
15endofthingRE = regex.compile(endofthingPat)
16
17commentRE = regex.compile("%[^\n\r]*")
18
19# XXX This not entirely correct:
20stringPat = """
21	(
22		\(
23			\(
24				[^()]*   \\\\   [()]
25			\)
26			\|
27			\(
28				[^()]*  (   [^()]*  )
29			\)
30		\)*
31		[^()]*
32	)
33"""
34stringPat = string.join(string.split(stringPat), '')
35stringRE = regex.compile(stringPat)
36
37hexstringRE = regex.compile("<[%s0-9A-Fa-f]*>" % whitespace)
38
39ps_tokenerror = 'ps_tokenerror'
40ps_error = 'ps_error'
41
42class PSTokenizer(StringIO.StringIO):
43
44	def getnexttoken(self,
45			# localize some stuff, for performance
46			len = len,
47			ps_special = ps_special,
48			stringmatch = stringRE.match,
49			hexstringmatch = hexstringRE.match,
50			commentmatch = commentRE.match,
51			endmatch = endofthingRE.match,
52			whitematch = skipwhiteRE.match):
53
54		self.pos = self.pos + whitematch(self.buf, self.pos)
55		if self.pos >= self.len:
56			return None, None
57		pos = self.pos
58		buf = self.buf
59		char = buf[pos]
60		if char in ps_special:
61			if char in '{}[]':
62				tokentype = 'do_special'
63				token = char
64			elif char == '%':
65				tokentype = 'do_comment'
66				commentlen = commentmatch(buf, pos)
67				token = buf[pos:pos+commentlen]
68			elif char == '(':
69				tokentype = 'do_string'
70				strlen = stringmatch(buf, pos)
71				if strlen < 0:
72					raise ps_tokenerror, 'bad string at character %d' % pos
73				token = buf[pos:pos+strlen]
74			elif char == '<':
75				tokentype = 'do_hexstring'
76				strlen = hexstringmatch(buf, pos)
77				if strlen < 0:
78					raise ps_tokenerror, 'bad hexstring at character %d' % pos
79				token = buf[pos:pos+strlen]
80			else:
81				raise ps_tokenerror, 'bad token at character %d' % pos
82		else:
83			if char == '/':
84				tokentype = 'do_literal'
85				endofthing = endmatch(buf, pos + 1) + 1
86			else:
87				tokentype = ''
88				endofthing = endmatch(buf, pos)
89			if endofthing <= 0:
90				raise ps_tokenerror, 'bad token at character %d' % pos
91			token = buf[pos:pos + endofthing]
92		self.pos = pos + len(token)
93		return tokentype, token
94
95	def skipwhite(self, whitematch = skipwhiteRE.match):
96		self.pos = self.pos + whitematch(self.buf, self.pos)
97
98	def starteexec(self):
99		self.pos = self.pos + 1
100		#self.skipwhite()
101		self.dirtybuf = self.buf[self.pos:]
102		self.buf, R = eexec.decrypt(self.dirtybuf, 55665)
103		self.len = len(self.buf)
104		self.pos = 4
105
106	def stopeexec(self):
107		if not hasattr(self, 'dirtybuf'):
108			return
109		self.buf = self.dirtybuf
110		del self.dirtybuf
111
112	def flush(self):
113		if self.buflist:
114			self.buf = self.buf + string.join(self.buflist, '')
115			self.buflist = []
116
117
118class PSInterpreter(PSOperators):
119
120	def __init__(self):
121		systemdict = {}
122		userdict = {}
123		self.dictstack = [systemdict, userdict]
124		self.stack = []
125		self.proclevel = 0
126		self.procmark = ps_procmark()
127		self.fillsystemdict()
128
129	def fillsystemdict(self):
130		systemdict = self.dictstack[0]
131		systemdict['['] = systemdict['mark'] = self.mark = ps_mark()
132		systemdict[']'] = ps_operator(']', self.do_makearray)
133		systemdict['true'] = ps_boolean(1)
134		systemdict['false'] = ps_boolean(0)
135		systemdict['StandardEncoding'] = ps_array(ps_StandardEncoding)
136		systemdict['FontDirectory'] = ps_dict({})
137		self.suckoperators(systemdict, self.__class__)
138
139	def suckoperators(self, systemdict, klass):
140		for name in dir(klass):
141			attr = getattr(self, name)
142			if callable(attr) and name[:3] == 'ps_':
143				name = name[3:]
144				systemdict[name] = ps_operator(name, attr)
145		for baseclass in klass.__bases__:
146			self.suckoperators(systemdict, baseclass)
147
148	def interpret(self, data, getattr = getattr):
149		tokenizer = self.tokenizer = PSTokenizer(data)
150		getnexttoken = tokenizer.getnexttoken
151		do_token = self.do_token
152		handle_object = self.handle_object
153		try:
154			while 1:
155				tokentype, token = getnexttoken()
156				#print token
157				if not token:
158					break
159				if tokentype:
160					handler = getattr(self, tokentype)
161					object = handler(token)
162				else:
163					object = do_token(token)
164				if object is not None:
165					handle_object(object)
166			tokenizer.close()
167			self.tokenizer = None
168		finally:
169			if self.tokenizer is not None:
170				print 'ps error:\n- - - - - - -'
171				print self.tokenizer.buf[self.tokenizer.pos-50:self.tokenizer.pos]
172				print '>>>'
173				print self.tokenizer.buf[self.tokenizer.pos:self.tokenizer.pos+50]
174				print '- - - - - - -'
175
176	def handle_object(self, object):
177		if not (self.proclevel or object.literal or object.type == 'proceduretype'):
178			if object.type <> 'operatortype':
179				object = self.resolve_name(object.value)
180			if object.literal:
181				self.push(object)
182			else:
183				if object.type == 'proceduretype':
184					self.call_procedure(object)
185				else:
186					object.function()
187		else:
188			self.push(object)
189
190	def call_procedure(self, proc):
191		handle_object = self.handle_object
192		for item in proc.value:
193			handle_object(item)
194
195	def resolve_name(self, name):
196		dictstack = self.dictstack
197		for i in range(len(dictstack)-1, -1, -1):
198			if dictstack[i].has_key(name):
199				return dictstack[i][name]
200		raise ps_error, 'name error: ' + str(name)
201
202	def do_token(self, token,
203				atoi = string.atoi,
204				atof = string.atof,
205				ps_name = ps_name,
206				ps_integer = ps_integer,
207				ps_real = ps_real):
208		try:
209			num = atoi(token)
210		except (ValueError, OverflowError):
211			try:
212				num = atof(token)
213			except (ValueError, OverflowError):
214				if '#' in token:
215					hashpos = string.find(token, '#')
216					try:
217						base = string.atoi(token[:hashpos])
218						num = string.atoi(token[hashpos+1:], base)
219					except (ValueError, OverflowError):
220						return ps_name(token)
221					else:
222						return ps_integer(num)
223				else:
224					return ps_name(token)
225			else:
226				return ps_real(num)
227		else:
228			return ps_integer(num)
229
230	def do_comment(self, token):
231		pass
232
233	def do_literal(self, token):
234		return ps_literal(token[1:])
235
236	def do_string(self, token):
237		return ps_string(token[1:-1])
238
239	def do_hexstring(self, token):
240		hexStr = string.join(string.split(token[1:-1]), '')
241		if len(hexStr) % 2:
242			hexStr = hexStr + '0'
243		cleanstr = []
244		for i in range(0, len(hexStr), 2):
245			cleanstr.append(chr(string.atoi(hexStr[i:i+2], 16)))
246		cleanstr = string.join(cleanstr, '')
247		return ps_string(cleanstr)
248
249	def do_special(self, token):
250		if token == '{':
251			self.proclevel = self.proclevel + 1
252			return self.procmark
253		elif token == '}':
254			proc = []
255			while 1:
256				topobject = self.pop()
257				if topobject == self.procmark:
258					break
259				proc.append(topobject)
260			self.proclevel = self.proclevel - 1
261			proc.reverse()
262			return ps_procedure(proc)
263		elif token == '[':
264			return self.mark
265		elif token == ']':
266			return ps_name(']')
267		else:
268			raise ps_tokenerror, 'huh?'
269
270	def push(self, object):
271		self.stack.append(object)
272
273	def pop(self, *types):
274		stack = self.stack
275		if not stack:
276			raise ps_error, 'stack underflow'
277		object = stack[-1]
278		if types:
279			if object.type not in types:
280				raise ps_error, 'typecheck, expected %s, found %s' % (`types`, object.type)
281		del stack[-1]
282		return object
283
284	def do_makearray(self):
285		array = []
286		while 1:
287			topobject = self.pop()
288			if topobject == self.mark:
289				break
290			array.append(topobject)
291		array.reverse()
292		self.push(ps_array(array))
293
294	def close(self):
295		"""Remove circular references."""
296		del self.stack
297		del self.dictstack
298
299
300def unpack_item(item):
301	tp = type(item.value)
302	if tp == types.DictionaryType:
303		newitem = {}
304		for key, value in item.value.items():
305			newitem[key] = unpack_item(value)
306	elif tp == types.ListType:
307		newitem = [None] * len(item.value)
308		for i in range(len(item.value)):
309			newitem[i] = unpack_item(item.value[i])
310		if item.type == 'proceduretype':
311			newitem = tuple(newitem)
312	else:
313		newitem = item.value
314	return newitem
315
316def suckfont(data):
317	import re
318	m = re.search(r"/FontName\s+/([^ \t\n\r]+)\s+def", data)
319	if m:
320		fontName = m.group(1)
321	else:
322		fontName = None
323	interpreter = PSInterpreter()
324	interpreter.interpret("/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop")
325	interpreter.interpret(data)
326	fontdir = interpreter.dictstack[0]['FontDirectory'].value
327	if fontdir.has_key(fontName):
328		rawfont = fontdir[fontName]
329	else:
330		# fall back, in case fontName wasn't found
331		fontNames = fontdir.keys()
332		if len(fontNames) > 1:
333			fontNames.remove("Helvetica")
334		fontNames.sort()
335		rawfont = fontdir[fontNames[0]]
336	interpreter.close()
337	return unpack_item(rawfont)
338
339
340if __name__ == "__main__":
341	import macfs
342	fss, ok = macfs.StandardGetFile("LWFN")
343	if ok:
344		import t1Lib
345		data, kind = t1Lib.read(fss.as_pathname())
346		font = suckfont(data)
347