1# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
2# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
3##############################################################################
4#
5# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
6# All Rights Reserved.
7#
8# This software is subject to the provisions of the Zope Public License,
9# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
10# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
11# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
12# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
13# FOR A PARTICULAR PURPOSE.
14#
15##############################################################################
16## Originally zExceptions.ExceptionFormatter from Zope;
17## Modified by Ian Bicking, Imaginary Landscape, 2005
18"""
19An exception collector that finds traceback information plus
20supplements
21"""
22
23import sys
24import traceback
25import time
26from six.moves import cStringIO as StringIO
27import linecache
28from paste.exceptions import serial_number_generator
29import warnings
30
31DEBUG_EXCEPTION_FORMATTER = True
32DEBUG_IDENT_PREFIX = 'E-'
33FALLBACK_ENCODING = 'UTF-8'
34
35__all__ = ['collect_exception', 'ExceptionCollector']
36
37class ExceptionCollector(object):
38
39    """
40    Produces a data structure that can be used by formatters to
41    display exception reports.
42
43    Magic variables:
44
45    If you define one of these variables in your local scope, you can
46    add information to tracebacks that happen in that context.  This
47    allows applications to add all sorts of extra information about
48    the context of the error, including URLs, environmental variables,
49    users, hostnames, etc.  These are the variables we look for:
50
51    ``__traceback_supplement__``:
52        You can define this locally or globally (unlike all the other
53        variables, which must be defined locally).
54
55        ``__traceback_supplement__`` is a tuple of ``(factory, arg1,
56        arg2...)``.  When there is an exception, ``factory(arg1, arg2,
57        ...)`` is called, and the resulting object is inspected for
58        supplemental information.
59
60    ``__traceback_info__``:
61        This information is added to the traceback, usually fairly
62        literally.
63
64    ``__traceback_hide__``:
65        If set and true, this indicates that the frame should be
66        hidden from abbreviated tracebacks.  This way you can hide
67        some of the complexity of the larger framework and let the
68        user focus on their own errors.
69
70        By setting it to ``'before'``, all frames before this one will
71        be thrown away.  By setting it to ``'after'`` then all frames
72        after this will be thrown away until ``'reset'`` is found.  In
73        each case the frame where it is set is included, unless you
74        append ``'_and_this'`` to the value (e.g.,
75        ``'before_and_this'``).
76
77        Note that formatters will ignore this entirely if the frame
78        that contains the error wouldn't normally be shown according
79        to these rules.
80
81    ``__traceback_reporter__``:
82        This should be a reporter object (see the reporter module),
83        or a list/tuple of reporter objects.  All reporters found this
84        way will be given the exception, innermost first.
85
86    ``__traceback_decorator__``:
87        This object (defined in a local or global scope) will get the
88        result of this function (the CollectedException defined
89        below).  It may modify this object in place, or return an
90        entirely new object.  This gives the object the ability to
91        manipulate the traceback arbitrarily.
92
93    The actually interpretation of these values is largely up to the
94    reporters and formatters.
95
96    ``collect_exception(*sys.exc_info())`` will return an object with
97    several attributes:
98
99    ``frames``:
100        A list of frames
101    ``exception_formatted``:
102        The formatted exception, generally a full traceback
103    ``exception_type``:
104        The type of the exception, like ``ValueError``
105    ``exception_value``:
106        The string value of the exception, like ``'x not in list'``
107    ``identification_code``:
108        A hash of the exception data meant to identify the general
109        exception, so that it shares this code with other exceptions
110        that derive from the same problem.  The code is a hash of
111        all the module names and function names in the traceback,
112        plus exception_type.  This should be shown to users so they
113        can refer to the exception later. (@@: should it include a
114        portion that allows identification of the specific instance
115        of the exception as well?)
116
117    The list of frames goes innermost first.  Each frame has these
118    attributes; some values may be None if they could not be
119    determined.
120
121    ``modname``:
122        the name of the module
123    ``filename``:
124        the filename of the module
125    ``lineno``:
126        the line of the error
127    ``revision``:
128        the contents of __version__ or __revision__
129    ``name``:
130        the function name
131    ``supplement``:
132        an object created from ``__traceback_supplement__``
133    ``supplement_exception``:
134        a simple traceback of any exception ``__traceback_supplement__``
135        created
136    ``traceback_info``:
137        the str() of any ``__traceback_info__`` variable found in the local
138        scope (@@: should it str()-ify it or not?)
139    ``traceback_hide``:
140        the value of any ``__traceback_hide__`` variable
141    ``traceback_log``:
142        the value of any ``__traceback_log__`` variable
143
144
145    ``__traceback_supplement__`` is thrown away, but a fixed
146    set of attributes are captured; each of these attributes is
147    optional.
148
149    ``object``:
150        the name of the object being visited
151    ``source_url``:
152        the original URL requested
153    ``line``:
154        the line of source being executed (for interpreters, like ZPT)
155    ``column``:
156        the column of source being executed
157    ``expression``:
158        the expression being evaluated (also for interpreters)
159    ``warnings``:
160        a list of (string) warnings to be displayed
161    ``getInfo``:
162        a function/method that takes no arguments, and returns a string
163        describing any extra information
164    ``extraData``:
165        a function/method that takes no arguments, and returns a
166        dictionary.  The contents of this dictionary will not be
167        displayed in the context of the traceback, but globally for
168        the exception.  Results will be grouped by the keys in the
169        dictionaries (which also serve as titles).  The keys can also
170        be tuples of (importance, title); in this case the importance
171        should be ``important`` (shows up at top), ``normal`` (shows
172        up somewhere; unspecified), ``supplemental`` (shows up at
173        bottom), or ``extra`` (shows up hidden or not at all).
174
175    These are used to create an object with attributes of the same
176    names (``getInfo`` becomes a string attribute, not a method).
177    ``__traceback_supplement__`` implementations should be careful to
178    produce values that are relatively static and unlikely to cause
179    further errors in the reporting system -- any complex
180    introspection should go in ``getInfo()`` and should ultimately
181    return a string.
182
183    Note that all attributes are optional, and under certain
184    circumstances may be None or may not exist at all -- the collector
185    can only do a best effort, but must avoid creating any exceptions
186    itself.
187
188    Formatters may want to use ``__traceback_hide__`` as a hint to
189    hide frames that are part of the 'framework' or underlying system.
190    There are a variety of rules about special values for this
191    variables that formatters should be aware of.
192
193    TODO:
194
195    More attributes in __traceback_supplement__?  Maybe an attribute
196    that gives a list of local variables that should also be
197    collected?  Also, attributes that would be explicitly meant for
198    the entire request, not just a single frame.  Right now some of
199    the fixed set of attributes (e.g., source_url) are meant for this
200    use, but there's no explicit way for the supplement to indicate
201    new values, e.g., logged-in user, HTTP referrer, environment, etc.
202    Also, the attributes that do exist are Zope/Web oriented.
203
204    More information on frames?  cgitb, for instance, produces
205    extensive information on local variables.  There exists the
206    possibility that getting this information may cause side effects,
207    which can make debugging more difficult; but it also provides
208    fodder for post-mortem debugging.  However, the collector is not
209    meant to be configurable, but to capture everything it can and let
210    the formatters be configurable.  Maybe this would have to be a
211    configuration value, or maybe it could be indicated by another
212    magical variable (which would probably mean 'show all local
213    variables below this frame')
214    """
215
216    show_revisions = 0
217
218    def __init__(self, limit=None):
219        self.limit = limit
220
221    def getLimit(self):
222        limit = self.limit
223        if limit is None:
224            limit = getattr(sys, 'tracebacklimit', None)
225        return limit
226
227    def getRevision(self, globals):
228        if not self.show_revisions:
229            return None
230        revision = globals.get('__revision__', None)
231        if revision is None:
232            # Incorrect but commonly used spelling
233            revision = globals.get('__version__', None)
234
235        if revision is not None:
236            try:
237                revision = str(revision).strip()
238            except:
239                revision = '???'
240        return revision
241
242    def collectSupplement(self, supplement, tb):
243        result = {}
244
245        for name in ('object', 'source_url', 'line', 'column',
246                     'expression', 'warnings'):
247            result[name] = getattr(supplement, name, None)
248
249        func = getattr(supplement, 'getInfo', None)
250        if func:
251            result['info'] = func()
252        else:
253            result['info'] = None
254        func = getattr(supplement, 'extraData', None)
255        if func:
256            result['extra'] = func()
257        else:
258            result['extra'] = None
259        return SupplementaryData(**result)
260
261    def collectLine(self, tb, extra_data):
262        f = tb.tb_frame
263        lineno = tb.tb_lineno
264        co = f.f_code
265        filename = co.co_filename
266        name = co.co_name
267        globals = f.f_globals
268        locals = f.f_locals
269        if not hasattr(locals, 'has_key'):
270            # Something weird about this frame; it's not a real dict
271            warnings.warn(
272                "Frame %s has an invalid locals(): %r" % (
273                globals.get('__name__', 'unknown'), locals))
274            locals = {}
275        data = {}
276        data['modname'] = globals.get('__name__', None)
277        data['filename'] = filename
278        data['lineno'] = lineno
279        data['revision'] = self.getRevision(globals)
280        data['name'] = name
281        data['tbid'] = id(tb)
282
283        # Output a traceback supplement, if any.
284        if '__traceback_supplement__' in locals:
285            # Use the supplement defined in the function.
286            tbs = locals['__traceback_supplement__']
287        elif '__traceback_supplement__' in globals:
288            # Use the supplement defined in the module.
289            # This is used by Scripts (Python).
290            tbs = globals['__traceback_supplement__']
291        else:
292            tbs = None
293        if tbs is not None:
294            factory = tbs[0]
295            args = tbs[1:]
296            try:
297                supp = factory(*args)
298                data['supplement'] = self.collectSupplement(supp, tb)
299                if data['supplement'].extra:
300                    for key, value in data['supplement'].extra.items():
301                        extra_data.setdefault(key, []).append(value)
302            except:
303                if DEBUG_EXCEPTION_FORMATTER:
304                    out = StringIO()
305                    traceback.print_exc(file=out)
306                    text = out.getvalue()
307                    data['supplement_exception'] = text
308                # else just swallow the exception.
309
310        try:
311            tbi = locals.get('__traceback_info__', None)
312            if tbi is not None:
313                data['traceback_info'] = str(tbi)
314        except:
315            pass
316
317        marker = []
318        for name in ('__traceback_hide__', '__traceback_log__',
319                     '__traceback_decorator__'):
320            try:
321                tbh = locals.get(name, globals.get(name, marker))
322                if tbh is not marker:
323                    data[name[2:-2]] = tbh
324            except:
325                pass
326
327        return data
328
329    def collectExceptionOnly(self, etype, value):
330        return traceback.format_exception_only(etype, value)
331
332    def collectException(self, etype, value, tb, limit=None):
333        # The next line provides a way to detect recursion.
334        __exception_formatter__ = 1
335        frames = []
336        ident_data = []
337        traceback_decorators = []
338        if limit is None:
339            limit = self.getLimit()
340        n = 0
341        extra_data = {}
342        while tb is not None and (limit is None or n < limit):
343            if tb.tb_frame.f_locals.get('__exception_formatter__'):
344                # Stop recursion. @@: should make a fake ExceptionFrame
345                frames.append('(Recursive formatException() stopped)\n')
346                break
347            data = self.collectLine(tb, extra_data)
348            frame = ExceptionFrame(**data)
349            frames.append(frame)
350            if frame.traceback_decorator is not None:
351                traceback_decorators.append(frame.traceback_decorator)
352            ident_data.append(frame.modname or '?')
353            ident_data.append(frame.name or '?')
354            tb = tb.tb_next
355            n = n + 1
356        ident_data.append(str(etype))
357        ident = serial_number_generator.hash_identifier(
358            ' '.join(ident_data), length=5, upper=True,
359            prefix=DEBUG_IDENT_PREFIX)
360
361        result = CollectedException(
362            frames=frames,
363            exception_formatted=self.collectExceptionOnly(etype, value),
364            exception_type=etype,
365            exception_value=self.safeStr(value),
366            identification_code=ident,
367            date=time.localtime(),
368            extra_data=extra_data)
369        if etype is ImportError:
370            extra_data[('important', 'sys.path')] = [sys.path]
371        for decorator in traceback_decorators:
372            try:
373                new_result = decorator(result)
374                if new_result is not None:
375                    result = new_result
376            except:
377                pass
378        return result
379
380    def safeStr(self, obj):
381        try:
382            return str(obj)
383        except UnicodeEncodeError:
384            try:
385                return unicode(obj).encode(FALLBACK_ENCODING, 'replace')
386            except UnicodeEncodeError:
387                # This is when something is really messed up, but this can
388                # happen when the __str__ of an object has to handle unicode
389                return repr(obj)
390
391limit = 200
392
393class Bunch(object):
394
395    """
396    A generic container
397    """
398
399    def __init__(self, **attrs):
400        for name, value in attrs.items():
401            setattr(self, name, value)
402
403    def __repr__(self):
404        name = '<%s ' % self.__class__.__name__
405        name += ' '.join(['%s=%r' % (name, str(value)[:30])
406                          for name, value in self.__dict__.items()
407                          if not name.startswith('_')])
408        return name + '>'
409
410class CollectedException(Bunch):
411    """
412    This is the result of collection the exception; it contains copies
413    of data of interest.
414    """
415    # A list of frames (ExceptionFrame instances), innermost last:
416    frames = []
417    # The result of traceback.format_exception_only; this looks
418    # like a normal traceback you'd see in the interactive interpreter
419    exception_formatted = None
420    # The *string* representation of the type of the exception
421    # (@@: should we give the # actual class? -- we can't keep the
422    # actual exception around, but the class should be safe)
423    # Something like 'ValueError'
424    exception_type = None
425    # The string representation of the exception, from ``str(e)``.
426    exception_value = None
427    # An identifier which should more-or-less classify this particular
428    # exception, including where in the code it happened.
429    identification_code = None
430    # The date, as time.localtime() returns:
431    date = None
432    # A dictionary of supplemental data:
433    extra_data = {}
434
435class SupplementaryData(Bunch):
436    """
437    The result of __traceback_supplement__.  We don't keep the
438    supplement object around, for fear of GC problems and whatnot.
439    (@@: Maybe I'm being too superstitious about copying only specific
440    information over)
441    """
442
443    # These attributes are copied from the object, or left as None
444    # if the object doesn't have these attributes:
445    object = None
446    source_url = None
447    line = None
448    column = None
449    expression = None
450    warnings = None
451    # This is the *return value* of supplement.getInfo():
452    info = None
453
454class ExceptionFrame(Bunch):
455    """
456    This represents one frame of the exception.  Each frame is a
457    context in the call stack, typically represented by a line
458    number and module name in the traceback.
459    """
460
461    # The name of the module; can be None, especially when the code
462    # isn't associated with a module.
463    modname = None
464    # The filename (@@: when no filename, is it None or '?'?)
465    filename = None
466    # Line number
467    lineno = None
468    # The value of __revision__ or __version__ -- but only if
469    # show_revision = True (by defaut it is false).  (@@: Why not
470    # collect this?)
471    revision = None
472    # The name of the function with the error (@@: None or '?' when
473    # unknown?)
474    name = None
475    # A SupplementaryData object, if __traceback_supplement__ was found
476    # (and produced no errors)
477    supplement = None
478    # If accessing __traceback_supplement__ causes any error, the
479    # plain-text traceback is stored here
480    supplement_exception = None
481    # The str() of any __traceback_info__ value found
482    traceback_info = None
483    # The value of __traceback_hide__
484    traceback_hide = False
485    # The value of __traceback_decorator__
486    traceback_decorator = None
487    # The id() of the traceback scope, can be used to reference the
488    # scope for use elsewhere
489    tbid = None
490
491    def get_source_line(self, context=0):
492        """
493        Return the source of the current line of this frame.  You
494        probably want to .strip() it as well, as it is likely to have
495        leading whitespace.
496
497        If context is given, then that many lines on either side will
498        also be returned.  E.g., context=1 will give 3 lines.
499        """
500        if not self.filename or not self.lineno:
501            return None
502        lines = []
503        for lineno in range(self.lineno-context, self.lineno+context+1):
504            lines.append(linecache.getline(self.filename, lineno))
505        return ''.join(lines)
506
507if hasattr(sys, 'tracebacklimit'):
508    limit = min(limit, sys.tracebacklimit)
509
510col = ExceptionCollector()
511
512def collect_exception(t, v, tb, limit=None):
513    """
514    Collection an exception from ``sys.exc_info()``.
515
516    Use like::
517
518      try:
519          blah blah
520      except:
521          exc_data = collect_exception(*sys.exc_info())
522    """
523    return col.collectException(t, v, tb, limit=limit)
524