190f5ba538bf40bcf4fd41049c7bf4296d3ffc9c7Benjamin Peterson#!/usr/bin/env python3 2568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 3568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl"""\ 4568973181aa523bbcf7f827b3a2eb2affd96ea67Georg BrandlList python source files. 5568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 6568973181aa523bbcf7f827b3a2eb2affd96ea67Georg BrandlThere are three functions to check whether a file is a Python source, listed 7568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandlhere with increasing complexity: 8568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 9568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl- has_python_ext() checks whether a file name ends in '.py[w]'. 10568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl- look_like_python() checks whether the file is not binary and either has 11568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl the '.py[w]' extension or the first line contains the word 'python'. 12568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl- can_be_compiled() checks whether the file can be compiled by compile(). 13568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 14568973181aa523bbcf7f827b3a2eb2affd96ea67Georg BrandlThe file also must be of appropriate size - not bigger than a megabyte. 15568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 16568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandlwalk_python_files() recursively lists all Python files under the given directories. 17568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl""" 1889f507fe8c497b3f70fdcecce8bc240f9af2bbe2Thomas Wouters__author__ = "Oleg Broytmann, Georg Brandl" 19568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 20568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl__all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"] 21568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 22568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 2305e8be17fd15d8e649e026600f5ab20e1154599fChristian Heimesimport os, re 24568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 2598516a6930acdd39f74c609817cbf19be6c4a7dfVictor Stinnerbinary_re = re.compile(br'[\x00-\x08\x0E-\x1F\x7F]') 26568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 27568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandldebug = False 28568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 29568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandldef print_debug(msg): 306afaeb757af0dbd8508a0f2352ade61e41bec84cCollin Winter if debug: print(msg) 31568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 32568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 33568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandldef _open(fullpath): 34568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl try: 35568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl size = os.stat(fullpath).st_size 36b940e113bf90ff71b0ef57414ea2beea9d2a4bc0Guido van Rossum except OSError as err: # Permission denied - ignore the file 37568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl print_debug("%s: permission denied: %s" % (fullpath, err)) 38568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl return None 39568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 40568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl if size > 1024*1024: # too big 41568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl print_debug("%s: the file is too big: %d bytes" % (fullpath, size)) 42568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl return None 43568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 44568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl try: 4598516a6930acdd39f74c609817cbf19be6c4a7dfVictor Stinner return open(fullpath, "rb") 46b940e113bf90ff71b0ef57414ea2beea9d2a4bc0Guido van Rossum except IOError as err: # Access denied, or a special file - ignore it 47568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl print_debug("%s: access denied: %s" % (fullpath, err)) 48568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl return None 49568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 50568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandldef has_python_ext(fullpath): 51568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl return fullpath.endswith(".py") or fullpath.endswith(".pyw") 52568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 53568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandldef looks_like_python(fullpath): 54568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl infile = _open(fullpath) 55568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl if infile is None: 56568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl return False 57568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 58a90f311d0592f6ab56068441413a1925bd7393f4Victor Stinner with infile: 59a90f311d0592f6ab56068441413a1925bd7393f4Victor Stinner line = infile.readline() 609e34c047325651853a95f95e538582a4f6d5b7f6Tim Peters 61568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl if binary_re.search(line): 62568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl # file appears to be binary 63568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl print_debug("%s: appears to be binary" % fullpath) 64568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl return False 659e34c047325651853a95f95e538582a4f6d5b7f6Tim Peters 66568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl if fullpath.endswith(".py") or fullpath.endswith(".pyw"): 67568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl return True 6898516a6930acdd39f74c609817cbf19be6c4a7dfVictor Stinner elif b"python" in line: 69568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl # disguised Python script (e.g. CGI) 70568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl return True 71568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 72568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl return False 73568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 74568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandldef can_be_compiled(fullpath): 75568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl infile = _open(fullpath) 76568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl if infile is None: 77568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl return False 78568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 79a90f311d0592f6ab56068441413a1925bd7393f4Victor Stinner with infile: 80a90f311d0592f6ab56068441413a1925bd7393f4Victor Stinner code = infile.read() 81568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 82568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl try: 83568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl compile(code, fullpath, "exec") 84b940e113bf90ff71b0ef57414ea2beea9d2a4bc0Guido van Rossum except Exception as err: 85568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl print_debug("%s: cannot compile: %s" % (fullpath, err)) 86568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl return False 87568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 88568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl return True 89568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 90568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 91568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandldef walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None): 92568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl """\ 93568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl Recursively yield all Python source files below the given paths. 94568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 95568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl paths: a list of files and/or directories to be checked. 96568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl is_python: a function that takes a file name and checks whether it is a 97568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl Python source file 989e34c047325651853a95f95e538582a4f6d5b7f6Tim Peters exclude_dirs: a list of directory base names that should be excluded in 99568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl the search 100568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl """ 101568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl if exclude_dirs is None: 102568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl exclude_dirs=[] 1039e34c047325651853a95f95e538582a4f6d5b7f6Tim Peters 104568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl for path in paths: 105568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl print_debug("testing: %s" % path) 106568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl if os.path.isfile(path): 107568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl if is_python(path): 108568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl yield path 109568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl elif os.path.isdir(path): 110568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl print_debug(" it is a directory") 111568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl for dirpath, dirnames, filenames in os.walk(path): 112568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl for exclude in exclude_dirs: 113568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl if exclude in dirnames: 114568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl dirnames.remove(exclude) 115568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl for filename in filenames: 116568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl fullpath = os.path.join(dirpath, filename) 117568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl print_debug("testing: %s" % fullpath) 118568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl if is_python(fullpath): 119568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl yield fullpath 120568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl else: 121568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl print_debug(" unknown type") 122568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 123568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl 124568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandlif __name__ == "__main__": 125568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl # Two simple examples/tests 126568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl for fullpath in walk_python_files(['.']): 1276afaeb757af0dbd8508a0f2352ade61e41bec84cCollin Winter print(fullpath) 1286afaeb757af0dbd8508a0f2352ade61e41bec84cCollin Winter print("----------") 129568973181aa523bbcf7f827b3a2eb2affd96ea67Georg Brandl for fullpath in walk_python_files(['.'], is_python=can_be_compiled): 1306afaeb757af0dbd8508a0f2352ade61e41bec84cCollin Winter print(fullpath) 131