1/* C-based Tracer for Coverage. */
2
3#include "Python.h"
4#include "compile.h"        /* in 2.3, this wasn't part of Python.h */
5#include "eval.h"           /* or this. */
6#include "structmember.h"
7#include "frameobject.h"
8
9/* Compile-time debugging helpers */
10#undef WHAT_LOG         /* Define to log the WHAT params in the trace function. */
11#undef TRACE_LOG        /* Define to log our bookkeeping. */
12#undef COLLECT_STATS    /* Collect counters: stats are printed when tracer is stopped. */
13
14#if COLLECT_STATS
15#define STATS(x)        x
16#else
17#define STATS(x)
18#endif
19
20/* Py 2.x and 3.x compatibility */
21
22#ifndef Py_TYPE
23#define Py_TYPE(o)    (((PyObject*)(o))->ob_type)
24#endif
25
26#if PY_MAJOR_VERSION >= 3
27
28#define MyText_Type         PyUnicode_Type
29#define MyText_Check(o)     PyUnicode_Check(o)
30#define MyText_AS_STRING(o) PyBytes_AS_STRING(PyUnicode_AsASCIIString(o))
31#define MyInt_FromLong(l)   PyLong_FromLong(l)
32
33#define MyType_HEAD_INIT    PyVarObject_HEAD_INIT(NULL, 0)
34
35#else
36
37#define MyText_Type         PyString_Type
38#define MyText_Check(o)     PyString_Check(o)
39#define MyText_AS_STRING(o) PyString_AS_STRING(o)
40#define MyInt_FromLong(l)   PyInt_FromLong(l)
41
42#define MyType_HEAD_INIT    PyObject_HEAD_INIT(NULL)  0,
43
44#endif /* Py3k */
45
46/* The values returned to indicate ok or error. */
47#define RET_OK      0
48#define RET_ERROR   -1
49
50/* An entry on the data stack.  For each call frame, we need to record the
51    dictionary to capture data, and the last line number executed in that
52    frame.
53*/
54typedef struct {
55    PyObject * file_data;  /* PyMem_Malloc'ed, a borrowed ref. */
56    int last_line;
57} DataStackEntry;
58
59/* The CTracer type. */
60
61typedef struct {
62    PyObject_HEAD
63
64    /* Python objects manipulated directly by the Collector class. */
65    PyObject * should_trace;
66    PyObject * warn;
67    PyObject * data;
68    PyObject * should_trace_cache;
69    PyObject * arcs;
70
71    /* Has the tracer been started? */
72    int started;
73    /* Are we tracing arcs, or just lines? */
74    int tracing_arcs;
75
76    /*
77        The data stack is a stack of dictionaries.  Each dictionary collects
78        data for a single source file.  The data stack parallels the call stack:
79        each call pushes the new frame's file data onto the data stack, and each
80        return pops file data off.
81
82        The file data is a dictionary whose form depends on the tracing options.
83        If tracing arcs, the keys are line number pairs.  If not tracing arcs,
84        the keys are line numbers.  In both cases, the value is irrelevant
85        (None).
86    */
87    /* The index of the last-used entry in data_stack. */
88    int depth;
89    /* The file data at each level, or NULL if not recording. */
90    DataStackEntry * data_stack;
91    int data_stack_alloc;       /* number of entries allocated at data_stack. */
92
93    /* The current file_data dictionary.  Borrowed. */
94    PyObject * cur_file_data;
95
96    /* The line number of the last line recorded, for tracing arcs.
97        -1 means there was no previous line, as when entering a code object.
98    */
99    int last_line;
100
101    /* The parent frame for the last exception event, to fix missing returns. */
102    PyFrameObject * last_exc_back;
103    int last_exc_firstlineno;
104
105#if COLLECT_STATS
106    struct {
107        unsigned calls;
108        unsigned lines;
109        unsigned returns;
110        unsigned exceptions;
111        unsigned others;
112        unsigned new_files;
113        unsigned missed_returns;
114        unsigned stack_reallocs;
115        unsigned errors;
116    } stats;
117#endif /* COLLECT_STATS */
118} CTracer;
119
120#define STACK_DELTA    100
121
122static int
123CTracer_init(CTracer *self, PyObject *args_unused, PyObject *kwds_unused)
124{
125#if COLLECT_STATS
126    self->stats.calls = 0;
127    self->stats.lines = 0;
128    self->stats.returns = 0;
129    self->stats.exceptions = 0;
130    self->stats.others = 0;
131    self->stats.new_files = 0;
132    self->stats.missed_returns = 0;
133    self->stats.stack_reallocs = 0;
134    self->stats.errors = 0;
135#endif /* COLLECT_STATS */
136
137    self->should_trace = NULL;
138    self->warn = NULL;
139    self->data = NULL;
140    self->should_trace_cache = NULL;
141    self->arcs = NULL;
142
143    self->started = 0;
144    self->tracing_arcs = 0;
145
146    self->depth = -1;
147    self->data_stack = PyMem_Malloc(STACK_DELTA*sizeof(DataStackEntry));
148    if (self->data_stack == NULL) {
149        STATS( self->stats.errors++; )
150        PyErr_NoMemory();
151        return RET_ERROR;
152    }
153    self->data_stack_alloc = STACK_DELTA;
154
155    self->cur_file_data = NULL;
156    self->last_line = -1;
157
158    self->last_exc_back = NULL;
159
160    return RET_OK;
161}
162
163static void
164CTracer_dealloc(CTracer *self)
165{
166    if (self->started) {
167        PyEval_SetTrace(NULL, NULL);
168    }
169
170    Py_XDECREF(self->should_trace);
171    Py_XDECREF(self->warn);
172    Py_XDECREF(self->data);
173    Py_XDECREF(self->should_trace_cache);
174
175    PyMem_Free(self->data_stack);
176
177    Py_TYPE(self)->tp_free((PyObject*)self);
178}
179
180#if TRACE_LOG
181static const char *
182indent(int n)
183{
184    static const char * spaces =
185        "                                                                    "
186        "                                                                    "
187        "                                                                    "
188        "                                                                    "
189        ;
190    return spaces + strlen(spaces) - n*2;
191}
192
193static int logging = 0;
194/* Set these constants to be a file substring and line number to start logging. */
195static const char * start_file = "tests/views";
196static int start_line = 27;
197
198static void
199showlog(int depth, int lineno, PyObject * filename, const char * msg)
200{
201    if (logging) {
202        printf("%s%3d ", indent(depth), depth);
203        if (lineno) {
204            printf("%4d", lineno);
205        }
206        else {
207            printf("    ");
208        }
209        if (filename) {
210            printf(" %s", MyText_AS_STRING(filename));
211        }
212        if (msg) {
213            printf(" %s", msg);
214        }
215        printf("\n");
216    }
217}
218
219#define SHOWLOG(a,b,c,d)    showlog(a,b,c,d)
220#else
221#define SHOWLOG(a,b,c,d)
222#endif /* TRACE_LOG */
223
224#if WHAT_LOG
225static const char * what_sym[] = {"CALL", "EXC ", "LINE", "RET "};
226#endif
227
228/* Record a pair of integers in self->cur_file_data. */
229static int
230CTracer_record_pair(CTracer *self, int l1, int l2)
231{
232    int ret = RET_OK;
233
234    PyObject * t = PyTuple_New(2);
235    if (t != NULL) {
236        PyTuple_SET_ITEM(t, 0, MyInt_FromLong(l1));
237        PyTuple_SET_ITEM(t, 1, MyInt_FromLong(l2));
238        if (PyDict_SetItem(self->cur_file_data, t, Py_None) < 0) {
239            STATS( self->stats.errors++; )
240            ret = RET_ERROR;
241        }
242        Py_DECREF(t);
243    }
244    else {
245        STATS( self->stats.errors++; )
246        ret = RET_ERROR;
247    }
248    return ret;
249}
250
251/*
252 * The Trace Function
253 */
254static int
255CTracer_trace(CTracer *self, PyFrameObject *frame, int what, PyObject *arg_unused)
256{
257    int ret = RET_OK;
258    PyObject * filename = NULL;
259    PyObject * tracename = NULL;
260
261    #if WHAT_LOG
262    if (what <= sizeof(what_sym)/sizeof(const char *)) {
263        printf("trace: %s @ %s %d\n", what_sym[what], MyText_AS_STRING(frame->f_code->co_filename), frame->f_lineno);
264    }
265    #endif
266
267    #if TRACE_LOG
268    if (strstr(MyText_AS_STRING(frame->f_code->co_filename), start_file) && frame->f_lineno == start_line) {
269        logging = 1;
270    }
271    #endif
272
273    /* See below for details on missing-return detection. */
274    if (self->last_exc_back) {
275        if (frame == self->last_exc_back) {
276            /* Looks like someone forgot to send a return event. We'll clear
277               the exception state and do the RETURN code here.  Notice that the
278               frame we have in hand here is not the correct frame for the RETURN,
279               that frame is gone.  Our handling for RETURN doesn't need the
280               actual frame, but we do log it, so that will look a little off if
281               you're looking at the detailed log.
282
283               If someday we need to examine the frame when doing RETURN, then
284               we'll need to keep more of the missed frame's state.
285            */
286            STATS( self->stats.missed_returns++; )
287            if (self->depth >= 0) {
288                if (self->tracing_arcs && self->cur_file_data) {
289                    if (CTracer_record_pair(self, self->last_line, -self->last_exc_firstlineno) < 0) {
290                        return RET_ERROR;
291                    }
292                }
293                SHOWLOG(self->depth, frame->f_lineno, frame->f_code->co_filename, "missedreturn");
294                self->cur_file_data = self->data_stack[self->depth].file_data;
295                self->last_line = self->data_stack[self->depth].last_line;
296                self->depth--;
297            }
298        }
299        self->last_exc_back = NULL;
300    }
301
302
303    switch (what) {
304    case PyTrace_CALL:      /* 0 */
305        STATS( self->stats.calls++; )
306        /* Grow the stack. */
307        self->depth++;
308        if (self->depth >= self->data_stack_alloc) {
309            STATS( self->stats.stack_reallocs++; )
310            /* We've outgrown our data_stack array: make it bigger. */
311            int bigger = self->data_stack_alloc + STACK_DELTA;
312            DataStackEntry * bigger_data_stack = PyMem_Realloc(self->data_stack, bigger * sizeof(DataStackEntry));
313            if (bigger_data_stack == NULL) {
314                STATS( self->stats.errors++; )
315                PyErr_NoMemory();
316                self->depth--;
317                return RET_ERROR;
318            }
319            self->data_stack = bigger_data_stack;
320            self->data_stack_alloc = bigger;
321        }
322
323        /* Push the current state on the stack. */
324        self->data_stack[self->depth].file_data = self->cur_file_data;
325        self->data_stack[self->depth].last_line = self->last_line;
326
327        /* Check if we should trace this line. */
328        filename = frame->f_code->co_filename;
329        tracename = PyDict_GetItem(self->should_trace_cache, filename);
330        if (tracename == NULL) {
331            STATS( self->stats.new_files++; )
332            /* We've never considered this file before. */
333            /* Ask should_trace about it. */
334            PyObject * args = Py_BuildValue("(OO)", filename, frame);
335            tracename = PyObject_Call(self->should_trace, args, NULL);
336            Py_DECREF(args);
337            if (tracename == NULL) {
338                /* An error occurred inside should_trace. */
339                STATS( self->stats.errors++; )
340                return RET_ERROR;
341            }
342            if (PyDict_SetItem(self->should_trace_cache, filename, tracename) < 0) {
343                STATS( self->stats.errors++; )
344                return RET_ERROR;
345            }
346        }
347        else {
348            Py_INCREF(tracename);
349        }
350
351        /* If tracename is a string, then we're supposed to trace. */
352        if (MyText_Check(tracename)) {
353            PyObject * file_data = PyDict_GetItem(self->data, tracename);
354            if (file_data == NULL) {
355                file_data = PyDict_New();
356                if (file_data == NULL) {
357                    STATS( self->stats.errors++; )
358                    return RET_ERROR;
359                }
360                ret = PyDict_SetItem(self->data, tracename, file_data);
361                Py_DECREF(file_data);
362                if (ret < 0) {
363                    STATS( self->stats.errors++; )
364                    return RET_ERROR;
365                }
366            }
367            self->cur_file_data = file_data;
368            /* Make the frame right in case settrace(gettrace()) happens. */
369            Py_INCREF(self);
370            frame->f_trace = (PyObject*)self;
371            SHOWLOG(self->depth, frame->f_lineno, filename, "traced");
372        }
373        else {
374            self->cur_file_data = NULL;
375            SHOWLOG(self->depth, frame->f_lineno, filename, "skipped");
376        }
377
378        Py_DECREF(tracename);
379
380        self->last_line = -1;
381        break;
382
383    case PyTrace_RETURN:    /* 3 */
384        STATS( self->stats.returns++; )
385        /* A near-copy of this code is above in the missing-return handler. */
386        if (self->depth >= 0) {
387            if (self->tracing_arcs && self->cur_file_data) {
388                int first = frame->f_code->co_firstlineno;
389                if (CTracer_record_pair(self, self->last_line, -first) < 0) {
390                    return RET_ERROR;
391                }
392            }
393
394            SHOWLOG(self->depth, frame->f_lineno, frame->f_code->co_filename, "return");
395            self->cur_file_data = self->data_stack[self->depth].file_data;
396            self->last_line = self->data_stack[self->depth].last_line;
397            self->depth--;
398        }
399        break;
400
401    case PyTrace_LINE:      /* 2 */
402        STATS( self->stats.lines++; )
403        if (self->depth >= 0) {
404            SHOWLOG(self->depth, frame->f_lineno, frame->f_code->co_filename, "line");
405            if (self->cur_file_data) {
406                /* We're tracing in this frame: record something. */
407                if (self->tracing_arcs) {
408                    /* Tracing arcs: key is (last_line,this_line). */
409                    if (CTracer_record_pair(self, self->last_line, frame->f_lineno) < 0) {
410                        return RET_ERROR;
411                    }
412                }
413                else {
414                    /* Tracing lines: key is simply this_line. */
415                    PyObject * this_line = MyInt_FromLong(frame->f_lineno);
416                    if (this_line == NULL) {
417                        STATS( self->stats.errors++; )
418                        return RET_ERROR;
419                    }
420                    ret = PyDict_SetItem(self->cur_file_data, this_line, Py_None);
421                    Py_DECREF(this_line);
422                    if (ret < 0) {
423                        STATS( self->stats.errors++; )
424                        return RET_ERROR;
425                    }
426                }
427            }
428            self->last_line = frame->f_lineno;
429        }
430        break;
431
432    case PyTrace_EXCEPTION:
433        /* Some code (Python 2.3, and pyexpat anywhere) fires an exception event
434           without a return event.  To detect that, we'll keep a copy of the
435           parent frame for an exception event.  If the next event is in that
436           frame, then we must have returned without a return event.  We can
437           synthesize the missing event then.
438
439           Python itself fixed this problem in 2.4.  Pyexpat still has the bug.
440           I've reported the problem with pyexpat as http://bugs.python.org/issue6359 .
441           If it gets fixed, this code should still work properly.  Maybe some day
442           the bug will be fixed everywhere coverage.py is supported, and we can
443           remove this missing-return detection.
444
445           More about this fix: http://nedbatchelder.com/blog/200907/a_nasty_little_bug.html
446        */
447        STATS( self->stats.exceptions++; )
448        self->last_exc_back = frame->f_back;
449        self->last_exc_firstlineno = frame->f_code->co_firstlineno;
450        break;
451
452    default:
453        STATS( self->stats.others++; )
454        break;
455    }
456
457    return RET_OK;
458}
459
460/*
461 * Python has two ways to set the trace function: sys.settrace(fn), which
462 * takes a Python callable, and PyEval_SetTrace(func, obj), which takes
463 * a C function and a Python object.  The way these work together is that
464 * sys.settrace(pyfn) calls PyEval_SetTrace(builtin_func, pyfn), using the
465 * Python callable as the object in PyEval_SetTrace.  So sys.gettrace()
466 * simply returns the Python object used as the second argument to
467 * PyEval_SetTrace.  So sys.gettrace() will return our self parameter, which
468 * means it must be callable to be used in sys.settrace().
469 *
470 * So we make our self callable, equivalent to invoking our trace function.
471 *
472 * To help with the process of replaying stored frames, this function has an
473 * optional keyword argument:
474 *
475 *      def CTracer_call(frame, event, arg, lineno=0)
476 *
477 * If provided, the lineno argument is used as the line number, and the
478 * frame's f_lineno member is ignored.
479 */
480static PyObject *
481CTracer_call(CTracer *self, PyObject *args, PyObject *kwds)
482{
483    PyFrameObject *frame;
484    PyObject *what_str;
485    PyObject *arg;
486    int lineno = 0;
487    int what;
488    int orig_lineno;
489    PyObject *ret = NULL;
490
491    static char *what_names[] = {
492        "call", "exception", "line", "return",
493        "c_call", "c_exception", "c_return",
494        NULL
495        };
496
497    #if WHAT_LOG
498    printf("pytrace\n");
499    #endif
500
501    static char *kwlist[] = {"frame", "event", "arg", "lineno", NULL};
502
503    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O!O|i:Tracer_call", kwlist,
504            &PyFrame_Type, &frame, &MyText_Type, &what_str, &arg, &lineno)) {
505        goto done;
506    }
507
508    /* In Python, the what argument is a string, we need to find an int
509       for the C function. */
510    for (what = 0; what_names[what]; what++) {
511        if (!strcmp(MyText_AS_STRING(what_str), what_names[what])) {
512            break;
513        }
514    }
515
516    /* Save off the frame's lineno, and use the forced one, if provided. */
517    orig_lineno = frame->f_lineno;
518    if (lineno > 0) {
519        frame->f_lineno = lineno;
520    }
521
522    /* Invoke the C function, and return ourselves. */
523    if (CTracer_trace(self, frame, what, arg) == RET_OK) {
524        Py_INCREF(self);
525        ret = (PyObject *)self;
526    }
527
528    /* Clean up. */
529    frame->f_lineno = orig_lineno;
530
531done:
532    return ret;
533}
534
535static PyObject *
536CTracer_start(CTracer *self, PyObject *args_unused)
537{
538    PyEval_SetTrace((Py_tracefunc)CTracer_trace, (PyObject*)self);
539    self->started = 1;
540    self->tracing_arcs = self->arcs && PyObject_IsTrue(self->arcs);
541    self->last_line = -1;
542
543    /* start() returns a trace function usable with sys.settrace() */
544    Py_INCREF(self);
545    return (PyObject *)self;
546}
547
548static PyObject *
549CTracer_stop(CTracer *self, PyObject *args_unused)
550{
551    if (self->started) {
552        PyEval_SetTrace(NULL, NULL);
553        self->started = 0;
554    }
555
556    return Py_BuildValue("");
557}
558
559static PyObject *
560CTracer_get_stats(CTracer *self)
561{
562#if COLLECT_STATS
563    return Py_BuildValue(
564        "{sI,sI,sI,sI,sI,sI,sI,sI,si,sI}",
565        "calls", self->stats.calls,
566        "lines", self->stats.lines,
567        "returns", self->stats.returns,
568        "exceptions", self->stats.exceptions,
569        "others", self->stats.others,
570        "new_files", self->stats.new_files,
571        "missed_returns", self->stats.missed_returns,
572        "stack_reallocs", self->stats.stack_reallocs,
573        "stack_alloc", self->data_stack_alloc,
574        "errors", self->stats.errors
575        );
576#else
577    return Py_BuildValue("");
578#endif /* COLLECT_STATS */
579}
580
581static PyMemberDef
582CTracer_members[] = {
583    { "should_trace",       T_OBJECT, offsetof(CTracer, should_trace), 0,
584            PyDoc_STR("Function indicating whether to trace a file.") },
585
586    { "warn",               T_OBJECT, offsetof(CTracer, warn), 0,
587            PyDoc_STR("Function for issuing warnings.") },
588
589    { "data",               T_OBJECT, offsetof(CTracer, data), 0,
590            PyDoc_STR("The raw dictionary of trace data.") },
591
592    { "should_trace_cache", T_OBJECT, offsetof(CTracer, should_trace_cache), 0,
593            PyDoc_STR("Dictionary caching should_trace results.") },
594
595    { "arcs",               T_OBJECT, offsetof(CTracer, arcs), 0,
596            PyDoc_STR("Should we trace arcs, or just lines?") },
597
598    { NULL }
599};
600
601static PyMethodDef
602CTracer_methods[] = {
603    { "start",      (PyCFunction) CTracer_start,        METH_VARARGS,
604            PyDoc_STR("Start the tracer") },
605
606    { "stop",       (PyCFunction) CTracer_stop,         METH_VARARGS,
607            PyDoc_STR("Stop the tracer") },
608
609    { "get_stats",  (PyCFunction) CTracer_get_stats,    METH_VARARGS,
610            PyDoc_STR("Get statistics about the tracing") },
611
612    { NULL }
613};
614
615static PyTypeObject
616CTracerType = {
617    MyType_HEAD_INIT
618    "coverage.CTracer",        /*tp_name*/
619    sizeof(CTracer),           /*tp_basicsize*/
620    0,                         /*tp_itemsize*/
621    (destructor)CTracer_dealloc, /*tp_dealloc*/
622    0,                         /*tp_print*/
623    0,                         /*tp_getattr*/
624    0,                         /*tp_setattr*/
625    0,                         /*tp_compare*/
626    0,                         /*tp_repr*/
627    0,                         /*tp_as_number*/
628    0,                         /*tp_as_sequence*/
629    0,                         /*tp_as_mapping*/
630    0,                         /*tp_hash */
631    (ternaryfunc)CTracer_call, /*tp_call*/
632    0,                         /*tp_str*/
633    0,                         /*tp_getattro*/
634    0,                         /*tp_setattro*/
635    0,                         /*tp_as_buffer*/
636    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
637    "CTracer objects",         /* tp_doc */
638    0,                         /* tp_traverse */
639    0,                         /* tp_clear */
640    0,                         /* tp_richcompare */
641    0,                         /* tp_weaklistoffset */
642    0,                         /* tp_iter */
643    0,                         /* tp_iternext */
644    CTracer_methods,           /* tp_methods */
645    CTracer_members,           /* tp_members */
646    0,                         /* tp_getset */
647    0,                         /* tp_base */
648    0,                         /* tp_dict */
649    0,                         /* tp_descr_get */
650    0,                         /* tp_descr_set */
651    0,                         /* tp_dictoffset */
652    (initproc)CTracer_init,    /* tp_init */
653    0,                         /* tp_alloc */
654    0,                         /* tp_new */
655};
656
657/* Module definition */
658
659#define MODULE_DOC PyDoc_STR("Fast coverage tracer.")
660
661#if PY_MAJOR_VERSION >= 3
662
663static PyModuleDef
664moduledef = {
665    PyModuleDef_HEAD_INIT,
666    "coverage.tracer",
667    MODULE_DOC,
668    -1,
669    NULL,       /* methods */
670    NULL,
671    NULL,       /* traverse */
672    NULL,       /* clear */
673    NULL
674};
675
676
677PyObject *
678PyInit_tracer(void)
679{
680    PyObject * mod = PyModule_Create(&moduledef);
681    if (mod == NULL) {
682        return NULL;
683    }
684
685    CTracerType.tp_new = PyType_GenericNew;
686    if (PyType_Ready(&CTracerType) < 0) {
687        Py_DECREF(mod);
688        return NULL;
689    }
690
691    Py_INCREF(&CTracerType);
692    PyModule_AddObject(mod, "CTracer", (PyObject *)&CTracerType);
693
694    return mod;
695}
696
697#else
698
699void
700inittracer(void)
701{
702    PyObject * mod;
703
704    mod = Py_InitModule3("coverage.tracer", NULL, MODULE_DOC);
705    if (mod == NULL) {
706        return;
707    }
708
709    CTracerType.tp_new = PyType_GenericNew;
710    if (PyType_Ready(&CTracerType) < 0) {
711        return;
712    }
713
714    Py_INCREF(&CTracerType);
715    PyModule_AddObject(mod, "CTracer", (PyObject *)&CTracerType);
716}
717
718#endif /* Py3k */
719
720