1# Copyright (C) 2009 Google Inc. All rights reserved.
2# Copyright (C) 2010 Chris Jerdonek (chris.jerdonek@gmail.com)
3# Copyright (C) 2010 ProFUSION embedded systems
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9#     * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11#     * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15#     * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31"""Supports reading and processing text files."""
32
33import codecs
34import logging
35import os
36import sys
37
38
39_log = logging.getLogger(__name__)
40
41
42class TextFileReader(object):
43
44    """Supports reading and processing text files.
45
46       Attributes:
47         file_count: The total number of files passed to this instance
48                     for processing, including non-text files and files
49                     that should be skipped.
50         delete_only_file_count: The total number of files that are not
51                                 processed this instance actually because
52                                 the files don't have any modified lines
53                                 but should be treated as processed.
54
55    """
56
57    def __init__(self, filesystem, processor):
58        """Create an instance.
59
60        Arguments:
61          processor: A ProcessorBase instance.
62
63        """
64        # FIXME: Although TextFileReader requires a FileSystem it circumvents it in two places!
65        self.filesystem = filesystem
66        self._processor = processor
67        self.file_count = 0
68        self.delete_only_file_count = 0
69
70    def _read_lines(self, file_path):
71        """Read the file at a path, and return its lines.
72
73        Raises:
74          IOError: If the file does not exist or cannot be read.
75
76        """
77        # Support the UNIX convention of using "-" for stdin.
78        if file_path == '-':
79            file = codecs.StreamReaderWriter(sys.stdin,
80                                             codecs.getreader('utf8'),
81                                             codecs.getwriter('utf8'),
82                                             'replace')
83        else:
84            # We do not open the file with universal newline support
85            # (codecs does not support it anyway), so the resulting
86            # lines contain trailing "\r" characters if we are reading
87            # a file with CRLF endings.
88            # FIXME: This should use self.filesystem
89            file = codecs.open(file_path, 'r', 'utf8', 'replace')
90
91        try:
92            contents = file.read()
93        finally:
94            file.close()
95
96        lines = contents.split('\n')
97        return lines
98
99    def process_file(self, file_path, **kwargs):
100        """Process the given file by calling the processor's process() method.
101
102        Args:
103          file_path: The path of the file to process.
104          **kwargs: Any additional keyword parameters that should be passed
105                    to the processor's process() method.  The process()
106                    method should support these keyword arguments.
107
108        Raises:
109          SystemExit: If no file at file_path exists.
110
111        """
112        self.file_count += 1
113
114        if not self.filesystem.exists(file_path) and file_path != "-":
115            _log.error("File does not exist: '%s'" % file_path)
116            sys.exit(1)  # FIXME: This should throw or return instead of exiting directly.
117
118        if not self._processor.should_process(file_path):
119            _log.debug("Skipping file: '%s'" % file_path)
120            return
121        _log.debug("Processing file: '%s'" % file_path)
122
123        try:
124            lines = self._read_lines(file_path)
125        except IOError, err:
126            message = ("Could not read file. Skipping: '%s'\n  %s" % (file_path, err))
127            _log.warn(message)
128            return
129
130        self._processor.process(lines, file_path, **kwargs)
131
132    def _process_directory(self, directory):
133        """Process all files in the given directory, recursively."""
134        # FIXME: We should consider moving to self.filesystem.files_under() (or adding walk() to FileSystem)
135        for dir_path, dir_names, file_names in os.walk(directory):
136            for file_name in file_names:
137                file_path = self.filesystem.join(dir_path, file_name)
138                self.process_file(file_path)
139
140    def process_paths(self, paths):
141        for path in paths:
142            if self.filesystem.isdir(path):
143                self._process_directory(directory=path)
144            else:
145                self.process_file(path)
146
147    def count_delete_only_file(self):
148        """Count up files that contains only deleted lines.
149
150        Files which has no modified or newly-added lines don't need
151        to check style, but should be treated as checked. For that
152        purpose, we just count up the number of such files.
153        """
154        self.delete_only_file_count += 1
155