1#!/usr/bin/env python
2##########################################################################
3#
4# Copyright 2008 VMware, Inc.
5# All Rights Reserved.
6#
7# Permission is hereby granted, free of charge, to any person obtaining a
8# copy of this software and associated documentation files (the
9# "Software"), to deal in the Software without restriction, including
10# without limitation the rights to use, copy, modify, merge, publish,
11# distribute, sub license, and/or sell copies of the Software, and to
12# permit persons to whom the Software is furnished to do so, subject to
13# the following conditions:
14#
15# The above copyright notice and this permission notice (including the
16# next paragraph) shall be included in all copies or substantial portions
17# of the Software.
18#
19# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22# IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26#
27##########################################################################
28
29
30import sys
31import xml.parsers.expat
32import optparse
33
34from model import *
35
36
37ELEMENT_START, ELEMENT_END, CHARACTER_DATA, EOF = range(4)
38
39
40class XmlToken:
41
42    def __init__(self, type, name_or_data, attrs = None, line = None, column = None):
43        assert type in (ELEMENT_START, ELEMENT_END, CHARACTER_DATA, EOF)
44        self.type = type
45        self.name_or_data = name_or_data
46        self.attrs = attrs
47        self.line = line
48        self.column = column
49
50    def __str__(self):
51        if self.type == ELEMENT_START:
52            return '<' + self.name_or_data + ' ...>'
53        if self.type == ELEMENT_END:
54            return '</' + self.name_or_data + '>'
55        if self.type == CHARACTER_DATA:
56            return self.name_or_data
57        if self.type == EOF:
58            return 'end of file'
59        assert 0
60
61
62class XmlTokenizer:
63    """Expat based XML tokenizer."""
64
65    def __init__(self, fp, skip_ws = True):
66        self.fp = fp
67        self.tokens = []
68        self.index = 0
69        self.final = False
70        self.skip_ws = skip_ws
71
72        self.character_pos = 0, 0
73        self.character_data = ''
74
75        self.parser = xml.parsers.expat.ParserCreate()
76        self.parser.StartElementHandler  = self.handle_element_start
77        self.parser.EndElementHandler    = self.handle_element_end
78        self.parser.CharacterDataHandler = self.handle_character_data
79
80    def handle_element_start(self, name, attributes):
81        self.finish_character_data()
82        line, column = self.pos()
83        token = XmlToken(ELEMENT_START, name, attributes, line, column)
84        self.tokens.append(token)
85
86    def handle_element_end(self, name):
87        self.finish_character_data()
88        line, column = self.pos()
89        token = XmlToken(ELEMENT_END, name, None, line, column)
90        self.tokens.append(token)
91
92    def handle_character_data(self, data):
93        if not self.character_data:
94            self.character_pos = self.pos()
95        self.character_data += data
96
97    def finish_character_data(self):
98        if self.character_data:
99            if not self.skip_ws or not self.character_data.isspace():
100                line, column = self.character_pos
101                token = XmlToken(CHARACTER_DATA, self.character_data, None, line, column)
102                self.tokens.append(token)
103            self.character_data = ''
104
105    def next(self):
106        size = 16*1024
107        while self.index >= len(self.tokens) and not self.final:
108            self.tokens = []
109            self.index = 0
110            data = self.fp.read(size)
111            self.final = len(data) < size
112            data = data.rstrip('\0')
113            try:
114                self.parser.Parse(data, self.final)
115            except xml.parsers.expat.ExpatError, e:
116                #if e.code == xml.parsers.expat.errors.XML_ERROR_NO_ELEMENTS:
117                if e.code == 3:
118                    pass
119                else:
120                    raise e
121        if self.index >= len(self.tokens):
122            line, column = self.pos()
123            token = XmlToken(EOF, None, None, line, column)
124        else:
125            token = self.tokens[self.index]
126            self.index += 1
127        return token
128
129    def pos(self):
130        return self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber
131
132
133class TokenMismatch(Exception):
134
135    def __init__(self, expected, found):
136        self.expected = expected
137        self.found = found
138
139    def __str__(self):
140        return '%u:%u: %s expected, %s found' % (self.found.line, self.found.column, str(self.expected), str(self.found))
141
142
143
144class XmlParser:
145    """Base XML document parser."""
146
147    def __init__(self, fp):
148        self.tokenizer = XmlTokenizer(fp)
149        self.consume()
150
151    def consume(self):
152        self.token = self.tokenizer.next()
153
154    def match_element_start(self, name):
155        return self.token.type == ELEMENT_START and self.token.name_or_data == name
156
157    def match_element_end(self, name):
158        return self.token.type == ELEMENT_END and self.token.name_or_data == name
159
160    def element_start(self, name):
161        while self.token.type == CHARACTER_DATA:
162            self.consume()
163        if self.token.type != ELEMENT_START:
164            raise TokenMismatch(XmlToken(ELEMENT_START, name), self.token)
165        if self.token.name_or_data != name:
166            raise TokenMismatch(XmlToken(ELEMENT_START, name), self.token)
167        attrs = self.token.attrs
168        self.consume()
169        return attrs
170
171    def element_end(self, name):
172        while self.token.type == CHARACTER_DATA:
173            self.consume()
174        if self.token.type != ELEMENT_END:
175            raise TokenMismatch(XmlToken(ELEMENT_END, name), self.token)
176        if self.token.name_or_data != name:
177            raise TokenMismatch(XmlToken(ELEMENT_END, name), self.token)
178        self.consume()
179
180    def character_data(self, strip = True):
181        data = ''
182        while self.token.type == CHARACTER_DATA:
183            data += self.token.name_or_data
184            self.consume()
185        if strip:
186            data = data.strip()
187        return data
188
189
190class TraceParser(XmlParser):
191
192    def __init__(self, fp):
193        XmlParser.__init__(self, fp)
194        self.last_call_no = 0
195
196    def parse(self):
197        self.element_start('trace')
198        while self.token.type not in (ELEMENT_END, EOF):
199            call = self.parse_call()
200            self.handle_call(call)
201        if self.token.type != EOF:
202            self.element_end('trace')
203
204    def parse_call(self):
205        attrs = self.element_start('call')
206        try:
207            no = int(attrs['no'])
208        except KeyError:
209            self.last_call_no += 1
210            no = self.last_call_no
211        else:
212            self.last_call_no = no
213        klass = attrs['class']
214        method = attrs['method']
215        args = []
216        ret = None
217        time = None
218        while self.token.type == ELEMENT_START:
219            if self.token.name_or_data == 'arg':
220                arg = self.parse_arg()
221                args.append(arg)
222            elif self.token.name_or_data == 'ret':
223                ret = self.parse_ret()
224            elif self.token.name_or_data == 'call':
225                # ignore nested function calls
226                self.parse_call()
227            elif self.token.name_or_data == 'time':
228                time = self.parse_time()
229            else:
230                raise TokenMismatch("<arg ...> or <ret ...>", self.token)
231        self.element_end('call')
232
233        return Call(no, klass, method, args, ret, time)
234
235    def parse_arg(self):
236        attrs = self.element_start('arg')
237        name = attrs['name']
238        value = self.parse_value()
239        self.element_end('arg')
240
241        return name, value
242
243    def parse_ret(self):
244        attrs = self.element_start('ret')
245        value = self.parse_value()
246        self.element_end('ret')
247
248        return value
249
250    def parse_time(self):
251        attrs = self.element_start('time')
252        time = self.parse_value();
253        self.element_end('time')
254        return time
255
256    def parse_value(self):
257        expected_tokens = ('null', 'bool', 'int', 'uint', 'float', 'string', 'enum', 'array', 'struct', 'ptr', 'bytes')
258        if self.token.type == ELEMENT_START:
259            if self.token.name_or_data in expected_tokens:
260                method = getattr(self, 'parse_' +  self.token.name_or_data)
261                return method()
262        raise TokenMismatch(" or " .join(expected_tokens), self.token)
263
264    def parse_null(self):
265        self.element_start('null')
266        self.element_end('null')
267        return Literal(None)
268
269    def parse_bool(self):
270        self.element_start('bool')
271        value = int(self.character_data())
272        self.element_end('bool')
273        return Literal(value)
274
275    def parse_int(self):
276        self.element_start('int')
277        value = int(self.character_data())
278        self.element_end('int')
279        return Literal(value)
280
281    def parse_uint(self):
282        self.element_start('uint')
283        value = int(self.character_data())
284        self.element_end('uint')
285        return Literal(value)
286
287    def parse_float(self):
288        self.element_start('float')
289        value = float(self.character_data())
290        self.element_end('float')
291        return Literal(value)
292
293    def parse_enum(self):
294        self.element_start('enum')
295        name = self.character_data()
296        self.element_end('enum')
297        return NamedConstant(name)
298
299    def parse_string(self):
300        self.element_start('string')
301        value = self.character_data()
302        self.element_end('string')
303        return Literal(value)
304
305    def parse_bytes(self):
306        self.element_start('bytes')
307        value = self.character_data()
308        self.element_end('bytes')
309        return Blob(value)
310
311    def parse_array(self):
312        self.element_start('array')
313        elems = []
314        while self.token.type != ELEMENT_END:
315            elems.append(self.parse_elem())
316        self.element_end('array')
317        return Array(elems)
318
319    def parse_elem(self):
320        self.element_start('elem')
321        value = self.parse_value()
322        self.element_end('elem')
323        return value
324
325    def parse_struct(self):
326        attrs = self.element_start('struct')
327        name = attrs['name']
328        members = []
329        while self.token.type != ELEMENT_END:
330            members.append(self.parse_member())
331        self.element_end('struct')
332        return Struct(name, members)
333
334    def parse_member(self):
335        attrs = self.element_start('member')
336        name = attrs['name']
337        value = self.parse_value()
338        self.element_end('member')
339
340        return name, value
341
342    def parse_ptr(self):
343        self.element_start('ptr')
344        address = self.character_data()
345        self.element_end('ptr')
346
347        return Pointer(address)
348
349    def handle_call(self, call):
350        pass
351
352
353class TraceDumper(TraceParser):
354
355    def __init__(self, fp, outStream = sys.stdout):
356        TraceParser.__init__(self, fp)
357        self.formatter = format.DefaultFormatter(outStream)
358        self.pretty_printer = PrettyPrinter(self.formatter)
359
360    def handle_call(self, call):
361        call.visit(self.pretty_printer)
362        self.formatter.newline()
363
364
365class Main:
366    '''Common main class for all retrace command line utilities.'''
367
368    def __init__(self):
369        pass
370
371    def main(self):
372        optparser = self.get_optparser()
373        (options, args) = optparser.parse_args(sys.argv[1:])
374
375        if not args:
376            optparser.error('insufficient number of arguments')
377
378        for arg in args:
379            if arg.endswith('.gz'):
380                from gzip import GzipFile
381                stream = GzipFile(arg, 'rt')
382            elif arg.endswith('.bz2'):
383                from bz2 import BZ2File
384                stream = BZ2File(arg, 'rU')
385            else:
386                stream = open(arg, 'rt')
387            self.process_arg(stream, options)
388
389    def get_optparser(self):
390        optparser = optparse.OptionParser(
391            usage="\n\t%prog [options] TRACE  [...]")
392        return optparser
393
394    def process_arg(self, stream, options):
395        parser = TraceDumper(stream)
396        parser.parse()
397
398
399if __name__ == '__main__':
400    Main().main()
401