1# Copyright (C) 2009 Google Inc. All rights reserved. 2# Copyright (C) 2010 Chris Jerdonek (chris.jerdonek@gmail.com) 3# Copyright (C) 2010 ProFUSION embedded systems 4# 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions are 7# met: 8# 9# * Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# * Redistributions in binary form must reproduce the above 12# copyright notice, this list of conditions and the following disclaimer 13# in the documentation and/or other materials provided with the 14# distribution. 15# * Neither the name of Google Inc. nor the names of its 16# contributors may be used to endorse or promote products derived from 17# this software without specific prior written permission. 18# 19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31"""Supports reading and processing text files.""" 32 33import codecs 34import logging 35import os 36import sys 37 38 39_log = logging.getLogger(__name__) 40 41 42class TextFileReader(object): 43 44 """Supports reading and processing text files. 45 46 Attributes: 47 file_count: The total number of files passed to this instance 48 for processing, including non-text files and files 49 that should be skipped. 50 delete_only_file_count: The total number of files that are not 51 processed this instance actually because 52 the files don't have any modified lines 53 but should be treated as processed. 54 55 """ 56 57 def __init__(self, filesystem, processor): 58 """Create an instance. 59 60 Arguments: 61 processor: A ProcessorBase instance. 62 63 """ 64 # FIXME: Although TextFileReader requires a FileSystem it circumvents it in two places! 65 self.filesystem = filesystem 66 self._processor = processor 67 self.file_count = 0 68 self.delete_only_file_count = 0 69 70 def _read_lines(self, file_path): 71 """Read the file at a path, and return its lines. 72 73 Raises: 74 IOError: If the file does not exist or cannot be read. 75 76 """ 77 # Support the UNIX convention of using "-" for stdin. 78 if file_path == '-': 79 file = codecs.StreamReaderWriter(sys.stdin, 80 codecs.getreader('utf8'), 81 codecs.getwriter('utf8'), 82 'replace') 83 else: 84 # We do not open the file with universal newline support 85 # (codecs does not support it anyway), so the resulting 86 # lines contain trailing "\r" characters if we are reading 87 # a file with CRLF endings. 88 # FIXME: This should use self.filesystem 89 file = codecs.open(file_path, 'r', 'utf8', 'replace') 90 91 try: 92 contents = file.read() 93 finally: 94 file.close() 95 96 lines = contents.split('\n') 97 return lines 98 99 def process_file(self, file_path, **kwargs): 100 """Process the given file by calling the processor's process() method. 101 102 Args: 103 file_path: The path of the file to process. 104 **kwargs: Any additional keyword parameters that should be passed 105 to the processor's process() method. The process() 106 method should support these keyword arguments. 107 108 Raises: 109 SystemExit: If no file at file_path exists. 110 111 """ 112 self.file_count += 1 113 114 if not self.filesystem.exists(file_path) and file_path != "-": 115 _log.error("File does not exist: '%s'" % file_path) 116 sys.exit(1) # FIXME: This should throw or return instead of exiting directly. 117 118 if not self._processor.should_process(file_path): 119 _log.debug("Skipping file: '%s'" % file_path) 120 return 121 _log.debug("Processing file: '%s'" % file_path) 122 123 try: 124 lines = self._read_lines(file_path) 125 except IOError, err: 126 message = ("Could not read file. Skipping: '%s'\n %s" % (file_path, err)) 127 _log.warn(message) 128 return 129 130 self._processor.process(lines, file_path, **kwargs) 131 132 def _process_directory(self, directory): 133 """Process all files in the given directory, recursively.""" 134 # FIXME: We should consider moving to self.filesystem.files_under() (or adding walk() to FileSystem) 135 for dir_path, dir_names, file_names in os.walk(directory): 136 for file_name in file_names: 137 file_path = self.filesystem.join(dir_path, file_name) 138 self.process_file(file_path) 139 140 def process_paths(self, paths): 141 for path in paths: 142 if self.filesystem.isdir(path): 143 self._process_directory(directory=path) 144 else: 145 self.process_file(path) 146 147 def count_delete_only_file(self): 148 """Count up files that contains only deleted lines. 149 150 Files which has no modified or newly-added lines don't need 151 to check style, but should be treated as checked. For that 152 purpose, we just count up the number of such files. 153 """ 154 self.delete_only_file_count += 1 155