filelist.py revision f638486cf09816668c662241c160dad863582067
1adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward"""distutils.filelist
2adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
3adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg WardProvides the FileList class, used for poking about the filesystem
4adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Wardand building lists of files.
5adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward"""
6adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
7adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward__revision__ = "$Id$"
8adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
9e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadéimport os, re
10adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Wardimport fnmatch
11adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Wardfrom distutils.util import convert_path
127b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Wardfrom distutils.errors import DistutilsTemplateError, DistutilsInternalError
134f2f1335a8f889ac071b01becf49b49032beb163Jeremy Hyltonfrom distutils import log
14adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
15adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Wardclass FileList:
16c98927a059c458065ca3311a55f70b323f75c467Greg Ward    """A list of files built by on exploring the filesystem and filtered by
17c98927a059c458065ca3311a55f70b323f75c467Greg Ward    applying various patterns to what we find there.
18c98927a059c458065ca3311a55f70b323f75c467Greg Ward
19c98927a059c458065ca3311a55f70b323f75c467Greg Ward    Instance attributes:
20c98927a059c458065ca3311a55f70b323f75c467Greg Ward      dir
21c98927a059c458065ca3311a55f70b323f75c467Greg Ward        directory from which files will be taken -- only used if
22c98927a059c458065ca3311a55f70b323f75c467Greg Ward        'allfiles' not supplied to constructor
23c98927a059c458065ca3311a55f70b323f75c467Greg Ward      files
24c98927a059c458065ca3311a55f70b323f75c467Greg Ward        list of filenames currently being built/filtered/manipulated
25c98927a059c458065ca3311a55f70b323f75c467Greg Ward      allfiles
26c98927a059c458065ca3311a55f70b323f75c467Greg Ward        complete list of files under consideration (ie. without any
27c98927a059c458065ca3311a55f70b323f75c467Greg Ward        filtering applied)
28c98927a059c458065ca3311a55f70b323f75c467Greg Ward    """
29adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
30e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé    def __init__(self, warn=None, debug_print=None):
31cd8a1148e19116db109f27d26c02e1de536dc76eJeremy Hylton        # ignore argument to FileList, but keep them for backwards
32cd8a1148e19116db109f27d26c02e1de536dc76eJeremy Hylton        # compatibility
33979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward        self.allfiles = None
34979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward        self.files = []
35adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
36e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé    def set_allfiles(self, allfiles):
37979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward        self.allfiles = allfiles
38979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward
39e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé    def findall(self, dir=os.curdir):
40979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward        self.allfiles = findall(dir)
41979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward
42e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé    def debug_print(self, msg):
43adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        """Print 'msg' to stdout if the global DEBUG (taken from the
44adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        DISTUTILS_DEBUG environment variable) flag is true.
45adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        """
46fcd7353863c024bb87aabe7c4639ca8df692ac85Jeremy Hylton        from distutils.debug import DEBUG
47adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        if DEBUG:
48adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward            print msg
49adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
50979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward    # -- List-like methods ---------------------------------------------
51979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward
52e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé    def append(self, item):
53979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward        self.files.append(item)
54979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward
55e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé    def extend(self, items):
56979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward        self.files.extend(items)
57979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward
58e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé    def sort(self):
59979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward        # Not a strict lexical sort!
60979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward        sortable_files = map(os.path.split, self.files)
61979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward        sortable_files.sort()
62979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward        self.files = []
63979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward        for sort_tuple in sortable_files:
64f638486cf09816668c662241c160dad863582067Tarek Ziadé            self.files.append(os.path.join(*sort_tuple))
65979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward
66979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward
67979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward    # -- Other miscellaneous utility methods ---------------------------
68979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward
69e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé    def remove_duplicates(self):
70979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward        # Assumes list has been sorted!
71cd8a1148e19116db109f27d26c02e1de536dc76eJeremy Hylton        for i in range(len(self.files) - 1, 0, -1):
72cd8a1148e19116db109f27d26c02e1de536dc76eJeremy Hylton            if self.files[i] == self.files[i - 1]:
73979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward                del self.files[i]
74979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward
75979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward
76979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward    # -- "File template" methods ---------------------------------------
77b94b849d65af71b4b432a74fdaef8ccd88209cc0Fred Drake
78e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé    def _parse_template_line(self, line):
79e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé        words = line.split()
80c98927a059c458065ca3311a55f70b323f75c467Greg Ward        action = words[0]
81c98927a059c458065ca3311a55f70b323f75c467Greg Ward
827b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward        patterns = dir = dir_pattern = None
837b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward
847b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward        if action in ('include', 'exclude',
857b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward                      'global-include', 'global-exclude'):
86071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward            if len(words) < 2:
877b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward                raise DistutilsTemplateError, \
887b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward                      "'%s' expects <pattern1> <pattern2> ..." % action
89adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
907b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward            patterns = map(convert_path, words[1:])
91adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
927b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward        elif action in ('recursive-include', 'recursive-exclude'):
93071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward            if len(words) < 3:
947b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward                raise DistutilsTemplateError, \
957b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward                      "'%s' expects <dir> <pattern1> <pattern2> ..." % action
96adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
97c98927a059c458065ca3311a55f70b323f75c467Greg Ward            dir = convert_path(words[1])
987b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward            patterns = map(convert_path, words[2:])
99adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
1007b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward        elif action in ('graft', 'prune'):
101071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward            if len(words) != 2:
1027b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward                raise DistutilsTemplateError, \
1037b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward                     "'%s' expects a single <dir_pattern>" % action
104adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
1057b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward            dir_pattern = convert_path(words[1])
106c98927a059c458065ca3311a55f70b323f75c467Greg Ward
107c98927a059c458065ca3311a55f70b323f75c467Greg Ward        else:
1087b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward            raise DistutilsTemplateError, "unknown action '%s'" % action
1097b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward
110d5dcc174b0b8d3bef3e51b64f6fb1383e803cdd0Greg Ward        return (action, patterns, dir, dir_pattern)
1117b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward
112e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé    def process_template_line(self, line):
1137b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward        # Parse the line: split it up, make sure the right number of words
1140f341855accddf217f1926d2f50e30f9da26babaGreg Ward        # is there, and return the relevant words.  'action' is always
1157b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward        # defined: it's the first word of the line.  Which of the other
1167b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward        # three are defined depends on the action; it'll be either
1177b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward        # patterns, (dir and patterns), or (dir_pattern).
1181c8c9d12642ba04bb22f612f5d09eaf93c274a18Tarek Ziadé        action, patterns, dir, dir_pattern = self._parse_template_line(line)
119c98927a059c458065ca3311a55f70b323f75c467Greg Ward
120c98927a059c458065ca3311a55f70b323f75c467Greg Ward        # OK, now we know that the action is valid and we have the
121c98927a059c458065ca3311a55f70b323f75c467Greg Ward        # right number of words on the line for that action -- so we
1227b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward        # can proceed with minimal error-checking.
123c98927a059c458065ca3311a55f70b323f75c467Greg Ward        if action == 'include':
124e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé            self.debug_print("include " + ' '.join(patterns))
1257b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward            for pattern in patterns:
126071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward                if not self.include_pattern(pattern, anchor=1):
127cd8a1148e19116db109f27d26c02e1de536dc76eJeremy Hylton                    log.warn("warning: no files found matching '%s'",
128cd8a1148e19116db109f27d26c02e1de536dc76eJeremy Hylton                             pattern)
129c98927a059c458065ca3311a55f70b323f75c467Greg Ward
130c98927a059c458065ca3311a55f70b323f75c467Greg Ward        elif action == 'exclude':
131e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé            self.debug_print("exclude " + ' '.join(patterns))
1327b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward            for pattern in patterns:
133071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward                if not self.exclude_pattern(pattern, anchor=1):
134cd8a1148e19116db109f27d26c02e1de536dc76eJeremy Hylton                    log.warn(("warning: no previously-included files "
135cd8a1148e19116db109f27d26c02e1de536dc76eJeremy Hylton                              "found matching '%s'"), pattern)
136c98927a059c458065ca3311a55f70b323f75c467Greg Ward
137c98927a059c458065ca3311a55f70b323f75c467Greg Ward        elif action == 'global-include':
138e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé            self.debug_print("global-include " + ' '.join(patterns))
1397b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward            for pattern in patterns:
140071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward                if not self.include_pattern(pattern, anchor=0):
141cd8a1148e19116db109f27d26c02e1de536dc76eJeremy Hylton                    log.warn(("warning: no files found matching '%s' " +
142cd8a1148e19116db109f27d26c02e1de536dc76eJeremy Hylton                              "anywhere in distribution"), pattern)
143c98927a059c458065ca3311a55f70b323f75c467Greg Ward
144c98927a059c458065ca3311a55f70b323f75c467Greg Ward        elif action == 'global-exclude':
145e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé            self.debug_print("global-exclude " + ' '.join(patterns))
1467b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward            for pattern in patterns:
147071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward                if not self.exclude_pattern(pattern, anchor=0):
148cd8a1148e19116db109f27d26c02e1de536dc76eJeremy Hylton                    log.warn(("warning: no previously-included files matching "
149cd8a1148e19116db109f27d26c02e1de536dc76eJeremy Hylton                              "'%s' found anywhere in distribution"),
150cd8a1148e19116db109f27d26c02e1de536dc76eJeremy Hylton                             pattern)
151c98927a059c458065ca3311a55f70b323f75c467Greg Ward
152c98927a059c458065ca3311a55f70b323f75c467Greg Ward        elif action == 'recursive-include':
153c98927a059c458065ca3311a55f70b323f75c467Greg Ward            self.debug_print("recursive-include %s %s" %
154e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé                             (dir, ' '.join(patterns)))
1557b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward            for pattern in patterns:
156071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward                if not self.include_pattern(pattern, prefix=dir):
157cbd0b365c1f27d390827e74a88a96f3a13034e0eWalter Dörwald                    log.warn(("warning: no files found matching '%s' " +
158182b5aca27d376b08a2904bed42b751496f932f3Tim Peters                                "under directory '%s'"),
159cd8a1148e19116db109f27d26c02e1de536dc76eJeremy Hylton                             pattern, dir)
160c98927a059c458065ca3311a55f70b323f75c467Greg Ward
161c98927a059c458065ca3311a55f70b323f75c467Greg Ward        elif action == 'recursive-exclude':
162c98927a059c458065ca3311a55f70b323f75c467Greg Ward            self.debug_print("recursive-exclude %s %s" %
163e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé                             (dir, ' '.join(patterns)))
1647b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward            for pattern in patterns:
165c98927a059c458065ca3311a55f70b323f75c467Greg Ward                if not self.exclude_pattern(pattern, prefix=dir):
166cd8a1148e19116db109f27d26c02e1de536dc76eJeremy Hylton                    log.warn(("warning: no previously-included files matching "
167cd8a1148e19116db109f27d26c02e1de536dc76eJeremy Hylton                              "'%s' found under directory '%s'"),
168cd8a1148e19116db109f27d26c02e1de536dc76eJeremy Hylton                             pattern, dir)
169b94b849d65af71b4b432a74fdaef8ccd88209cc0Fred Drake
170c98927a059c458065ca3311a55f70b323f75c467Greg Ward        elif action == 'graft':
171c98927a059c458065ca3311a55f70b323f75c467Greg Ward            self.debug_print("graft " + dir_pattern)
1720f341855accddf217f1926d2f50e30f9da26babaGreg Ward            if not self.include_pattern(None, prefix=dir_pattern):
173cd8a1148e19116db109f27d26c02e1de536dc76eJeremy Hylton                log.warn("warning: no directories found matching '%s'",
174cd8a1148e19116db109f27d26c02e1de536dc76eJeremy Hylton                         dir_pattern)
175c98927a059c458065ca3311a55f70b323f75c467Greg Ward
176c98927a059c458065ca3311a55f70b323f75c467Greg Ward        elif action == 'prune':
177c98927a059c458065ca3311a55f70b323f75c467Greg Ward            self.debug_print("prune " + dir_pattern)
178c98927a059c458065ca3311a55f70b323f75c467Greg Ward            if not self.exclude_pattern(None, prefix=dir_pattern):
179cd8a1148e19116db109f27d26c02e1de536dc76eJeremy Hylton                log.warn(("no previously-included directories found " +
180cd8a1148e19116db109f27d26c02e1de536dc76eJeremy Hylton                          "matching '%s'"), dir_pattern)
181c98927a059c458065ca3311a55f70b323f75c467Greg Ward        else:
1827b3d56c85cacf45cf27f87e13a95fe12d76984d1Greg Ward            raise DistutilsInternalError, \
183c98927a059c458065ca3311a55f70b323f75c467Greg Ward                  "this cannot happen: invalid action '%s'" % action
184adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
185979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward    # -- Filtering/selection methods -----------------------------------
186979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward
187e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé    def include_pattern(self, pattern, anchor=1, prefix=None, is_regex=0):
1880f341855accddf217f1926d2f50e30f9da26babaGreg Ward        """Select strings (presumably filenames) from 'self.files' that
189e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé        match 'pattern', a Unix-style wildcard (glob) pattern.
190e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé
191e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé        Patterns are not quite the same as implemented by the 'fnmatch'
192e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé        module: '*' and '?'  match non-special characters, where "special"
193e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé        is platform-dependent: slash on Unix; colon, slash, and backslash on
1940f341855accddf217f1926d2f50e30f9da26babaGreg Ward        DOS/Windows; and colon on Mac OS.
195adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
196adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        If 'anchor' is true (the default), then the pattern match is more
197adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        stringent: "*.py" will match "foo.py" but not "foo/bar.py".  If
198adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        'anchor' is false, both of these will match.
199adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
200adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        If 'prefix' is supplied, then only filenames starting with 'prefix'
201adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        (itself a pattern) and ending with 'pattern', with anything in between
202adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        them, will match.  'anchor' is ignored in this case.
203adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
204adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
205adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        'pattern' is assumed to be either a string containing a regex or a
206adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        regex object -- no translation is done, the regex is just compiled
207adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        and used as-is.
208adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
209adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        Selected strings will be added to self.files.
210adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
211adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        Return 1 if files are found.
212adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        """
213adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        files_found = 0
214071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward        pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
2150f341855accddf217f1926d2f50e30f9da26babaGreg Ward        self.debug_print("include_pattern: applying regex r'%s'" %
216adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward                         pattern_re.pattern)
217adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
218adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        # delayed loading of allfiles list
219979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward        if self.allfiles is None:
220979db976a3a07e20df7664b567aba91a2d0b538cGreg Ward            self.findall()
221adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
222adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        for name in self.allfiles:
223071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward            if pattern_re.search(name):
224adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward                self.debug_print(" adding " + name)
225071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward                self.files.append(name)
226adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward                files_found = 1
227b94b849d65af71b4b432a74fdaef8ccd88209cc0Fred Drake
228adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        return files_found
229adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
230adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
231e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé    def exclude_pattern(self, pattern, anchor=1, prefix=None, is_regex=0):
232adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        """Remove strings (presumably filenames) from 'files' that match
233e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé        'pattern'.
234e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé
235e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé        Other parameters are the same as for 'include_pattern()', above.
236e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé        The list 'self.files' is modified in place. Return 1 if files are
237e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé        found.
238adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        """
239adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        files_found = 0
240071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward        pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
241adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        self.debug_print("exclude_pattern: applying regex r'%s'" %
242adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward                         pattern_re.pattern)
243071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward        for i in range(len(self.files)-1, -1, -1):
244071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward            if pattern_re.search(self.files[i]):
245adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward                self.debug_print(" removing " + self.files[i])
246adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward                del self.files[i]
247adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward                files_found = 1
248b94b849d65af71b4b432a74fdaef8ccd88209cc0Fred Drake
249adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        return files_found
250adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
251adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
252adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward# ----------------------------------------------------------------------
253adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward# Utility functions
254adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
255e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadédef findall(dir = os.curdir):
256adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    """Find all files under 'dir' and return the list of full filenames
257adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    (relative to 'dir').
258adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    """
259adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    from stat import ST_MODE, S_ISREG, S_ISDIR, S_ISLNK
260adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
261adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    list = []
262adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    stack = [dir]
263adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    pop = stack.pop
264adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    push = stack.append
265adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
266adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    while stack:
267adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        dir = pop()
268071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward        names = os.listdir(dir)
269adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
270adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        for name in names:
271adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward            if dir != os.curdir:        # avoid the dreaded "./" syndrome
272071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward                fullname = os.path.join(dir, name)
273adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward            else:
274adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward                fullname = name
275adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
276adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward            # Avoid excess stat calls -- just one will do, thank you!
277adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward            stat = os.stat(fullname)
278adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward            mode = stat[ST_MODE]
279adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward            if S_ISREG(mode):
280071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward                list.append(fullname)
281adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward            elif S_ISDIR(mode) and not S_ISLNK(mode):
282071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward                push(fullname)
283adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
284adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    return list
285adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
286adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
287faa6b121fb34f1c4db36f20b625ece0291396174Tarek Ziadédef glob_to_re(pattern):
288e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé    """Translate a shell-like glob pattern to a regular expression.
289e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé
290e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé    Return a string containing the regex.  Differs from
291e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé    'fnmatch.translate()' in that '*' does not match "special characters"
292e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé    (which are platform-specific).
293adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    """
294071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward    pattern_re = fnmatch.translate(pattern)
295adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
296adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
297adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
298adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    # and by extension they shouldn't match such "special characters" under
299adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    # any OS.  So change all non-escaped dots in the RE to match any
300adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    # character except the special characters.
301adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    # XXX currently the "special characters" are just slash -- i.e. this is
302adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    # Unix-only.
303faa6b121fb34f1c4db36f20b625ece0291396174Tarek Ziadé    pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', r'\1[^/]', pattern_re)
304faa6b121fb34f1c4db36f20b625ece0291396174Tarek Ziadé
305adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    return pattern_re
306adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
307adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
308e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadédef translate_pattern(pattern, anchor=1, prefix=None, is_regex=0):
309adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    """Translate a shell-like wildcard pattern to a compiled regular
310e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé    expression.
311e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé
312e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé    Return the compiled regex.  If 'is_regex' true,
313adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    then 'pattern' is directly compiled to a regex (if it's a string)
314adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    or just returned as-is (assumes it's a regex object).
315adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    """
316adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    if is_regex:
317e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé        if isinstance(pattern, str):
318adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward            return re.compile(pattern)
319adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        else:
320adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward            return pattern
321adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward
322adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    if pattern:
323071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward        pattern_re = glob_to_re(pattern)
324adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    else:
325adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        pattern_re = ''
326b94b849d65af71b4b432a74fdaef8ccd88209cc0Fred Drake
327adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    if prefix is not None:
32898026f1521b4c549168ecb0d03f0163070f3534eTarek Ziadé        # ditch end of pattern character
32998026f1521b4c549168ecb0d03f0163070f3534eTarek Ziadé        empty_pattern = glob_to_re('')
330e2f35c3588e6107e0166446c57da1da48399a005Tarek Ziadé        prefix_re = glob_to_re(prefix)[:-len(empty_pattern)]
331071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward        pattern_re = "^" + os.path.join(prefix_re, ".*" + pattern_re)
332adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward    else:                               # no prefix -- respect anchor flag
333adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward        if anchor:
334adc11720645a82c8115c8686b5bfdbc23cd78bb0Greg Ward            pattern_re = "^" + pattern_re
335b94b849d65af71b4b432a74fdaef8ccd88209cc0Fred Drake
336071ed76732e03f84bee67aef7a316aed82d2e79fGreg Ward    return re.compile(pattern_re)
337