1"""Coverage data for Coverage."""
2
3import os
4
5from coverage.backward import pickle, sorted        # pylint: disable=W0622
6from coverage.files import PathAliases
7
8
9class CoverageData(object):
10    """Manages collected coverage data, including file storage.
11
12    The data file format is a pickled dict, with these keys:
13
14        * collector: a string identifying the collecting software
15
16        * lines: a dict mapping filenames to sorted lists of line numbers
17          executed:
18            { 'file1': [17,23,45],  'file2': [1,2,3], ... }
19
20        * arcs: a dict mapping filenames to sorted lists of line number pairs:
21            { 'file1': [(17,23), (17,25), (25,26)], ... }
22
23    """
24
25    def __init__(self, basename=None, collector=None):
26        """Create a CoverageData.
27
28        `basename` is the name of the file to use for storing data.
29
30        `collector` is a string describing the coverage measurement software.
31
32        """
33        self.collector = collector or 'unknown'
34
35        self.use_file = True
36
37        # Construct the filename that will be used for data file storage, if we
38        # ever do any file storage.
39        self.filename = basename or ".coverage"
40        self.filename = os.path.abspath(self.filename)
41
42        # A map from canonical Python source file name to a dictionary in
43        # which there's an entry for each line number that has been
44        # executed:
45        #
46        #   {
47        #       'filename1.py': { 12: None, 47: None, ... },
48        #       ...
49        #       }
50        #
51        self.lines = {}
52
53        # A map from canonical Python source file name to a dictionary with an
54        # entry for each pair of line numbers forming an arc:
55        #
56        #   {
57        #       'filename1.py': { (12,14): None, (47,48): None, ... },
58        #       ...
59        #       }
60        #
61        self.arcs = {}
62
63        self.os = os
64        self.sorted = sorted
65        self.pickle = pickle
66
67    def usefile(self, use_file=True):
68        """Set whether or not to use a disk file for data."""
69        self.use_file = use_file
70
71    def read(self):
72        """Read coverage data from the coverage data file (if it exists)."""
73        if self.use_file:
74            self.lines, self.arcs = self._read_file(self.filename)
75        else:
76            self.lines, self.arcs = {}, {}
77
78    def write(self, suffix=None):
79        """Write the collected coverage data to a file.
80
81        `suffix` is a suffix to append to the base file name. This can be used
82        for multiple or parallel execution, so that many coverage data files
83        can exist simultaneously.  A dot will be used to join the base name and
84        the suffix.
85
86        """
87        if self.use_file:
88            filename = self.filename
89            if suffix:
90                filename += "." + suffix
91            self.write_file(filename)
92
93    def erase(self):
94        """Erase the data, both in this object, and from its file storage."""
95        if self.use_file:
96            if self.filename and os.path.exists(self.filename):
97                os.remove(self.filename)
98        self.lines = {}
99        self.arcs = {}
100
101    def line_data(self):
102        """Return the map from filenames to lists of line numbers executed."""
103        return dict(
104            [(f, self.sorted(lmap.keys())) for f, lmap in self.lines.items()]
105            )
106
107    def arc_data(self):
108        """Return the map from filenames to lists of line number pairs."""
109        return dict(
110            [(f, self.sorted(amap.keys())) for f, amap in self.arcs.items()]
111            )
112
113    def write_file(self, filename):
114        """Write the coverage data to `filename`."""
115
116        # Create the file data.
117        data = {}
118
119        data['lines'] = self.line_data()
120        arcs = self.arc_data()
121        if arcs:
122            data['arcs'] = arcs
123
124        if self.collector:
125            data['collector'] = self.collector
126
127        # Write the pickle to the file.
128        fdata = open(filename, 'wb')
129        try:
130            self.pickle.dump(data, fdata, 2)
131        finally:
132            fdata.close()
133
134    def read_file(self, filename):
135        """Read the coverage data from `filename`."""
136        self.lines, self.arcs = self._read_file(filename)
137
138    def raw_data(self, filename):
139        """Return the raw pickled data from `filename`."""
140        fdata = open(filename, 'rb')
141        try:
142            data = pickle.load(fdata)
143        finally:
144            fdata.close()
145        return data
146
147    def _read_file(self, filename):
148        """Return the stored coverage data from the given file.
149
150        Returns two values, suitable for assigning to `self.lines` and
151        `self.arcs`.
152
153        """
154        lines = {}
155        arcs = {}
156        try:
157            data = self.raw_data(filename)
158            if isinstance(data, dict):
159                # Unpack the 'lines' item.
160                lines = dict([
161                    (f, dict.fromkeys(linenos, None))
162                        for f, linenos in data.get('lines', {}).items()
163                    ])
164                # Unpack the 'arcs' item.
165                arcs = dict([
166                    (f, dict.fromkeys(arcpairs, None))
167                        for f, arcpairs in data.get('arcs', {}).items()
168                    ])
169        except Exception:
170            pass
171        return lines, arcs
172
173    def combine_parallel_data(self, aliases=None):
174        """Combine a number of data files together.
175
176        Treat `self.filename` as a file prefix, and combine the data from all
177        of the data files starting with that prefix plus a dot.
178
179        If `aliases` is provided, it's a `PathAliases` object that is used to
180        re-map paths to match the local machine's.
181
182        """
183        aliases = aliases or PathAliases()
184        data_dir, local = os.path.split(self.filename)
185        localdot = local + '.'
186        for f in os.listdir(data_dir or '.'):
187            if f.startswith(localdot):
188                full_path = os.path.join(data_dir, f)
189                new_lines, new_arcs = self._read_file(full_path)
190                for filename, file_data in new_lines.items():
191                    filename = aliases.map(filename)
192                    self.lines.setdefault(filename, {}).update(file_data)
193                for filename, file_data in new_arcs.items():
194                    filename = aliases.map(filename)
195                    self.arcs.setdefault(filename, {}).update(file_data)
196                if f != local:
197                    os.remove(full_path)
198
199    def add_line_data(self, line_data):
200        """Add executed line data.
201
202        `line_data` is { filename: { lineno: None, ... }, ...}
203
204        """
205        for filename, linenos in line_data.items():
206            self.lines.setdefault(filename, {}).update(linenos)
207
208    def add_arc_data(self, arc_data):
209        """Add measured arc data.
210
211        `arc_data` is { filename: { (l1,l2): None, ... }, ...}
212
213        """
214        for filename, arcs in arc_data.items():
215            self.arcs.setdefault(filename, {}).update(arcs)
216
217    def touch_file(self, filename):
218        """Ensure that `filename` appears in the data, empty if needed."""
219        self.lines.setdefault(filename, {})
220
221    def measured_files(self):
222        """A list of all files that had been measured."""
223        return list(self.lines.keys())
224
225    def executed_lines(self, filename):
226        """A map containing all the line numbers executed in `filename`.
227
228        If `filename` hasn't been collected at all (because it wasn't executed)
229        then return an empty map.
230
231        """
232        return self.lines.get(filename) or {}
233
234    def executed_arcs(self, filename):
235        """A map containing all the arcs executed in `filename`."""
236        return self.arcs.get(filename) or {}
237
238    def add_to_hash(self, filename, hasher):
239        """Contribute `filename`'s data to the Md5Hash `hasher`."""
240        hasher.update(self.executed_lines(filename))
241        hasher.update(self.executed_arcs(filename))
242
243    def summary(self, fullpath=False):
244        """Return a dict summarizing the coverage data.
245
246        Keys are based on the filenames, and values are the number of executed
247        lines.  If `fullpath` is true, then the keys are the full pathnames of
248        the files, otherwise they are the basenames of the files.
249
250        """
251        summ = {}
252        if fullpath:
253            filename_fn = lambda f: f
254        else:
255            filename_fn = self.os.path.basename
256        for filename, lines in self.lines.items():
257            summ[filename_fn(filename)] = len(lines)
258        return summ
259
260    def has_arcs(self):
261        """Does this data have arcs?"""
262        return bool(self.arcs)
263
264
265if __name__ == '__main__':
266    # Ad-hoc: show the raw data in a data file.
267    import pprint, sys
268    covdata = CoverageData()
269    if sys.argv[1:]:
270        fname = sys.argv[1]
271    else:
272        fname = covdata.filename
273    pprint.pprint(covdata.raw_data(fname))
274