1#! /usr/bin/python
2
3# Copyright (C) 2016 and later: Unicode, Inc. and others.
4# License & terms of use: http://www.unicode.org/copyright.html
5
6# Copyright (C) 2009-2011, International Business Machines Corporation, Google and Others.
7# All rights reserved.
8
9#
10#  Script to check and fix svn property settings for ICU source files.
11#  Also check for the correct line endings on files with svn:eol-style = native
12#
13#  THIS SCRIPT DOES NOT WORK ON WINDOWS
14#     It only works correctly on platforms where the native line ending is a plain \n
15#
16#  usage:
17#     icu-svnprops-check.py  [options]
18#
19#  options:
20#     -f | --fix     Fix any problems that are found
21#     -h | --help    Print a usage line and exit.
22#
23#  The tool operates recursively on the directory from which it is run.
24#  Only files from the svn repository are checked.
25#  No changes are made to the repository; only the working copy will be altered.
26
27import sys
28import os
29import os.path
30import re
31import getopt
32
33#
34#  svn autoprops definitions.
35#      Copy and paste here the ICU recommended auto-props from
36#      http://icu-project.org/docs/subversion_howto/index.html
37#
38#  This program will parse this autoprops string, and verify that files in
39#  the repository have the recommeded properties set.
40#
41svn_auto_props = """
42### Section for configuring automatic properties.
43[auto-props]
44### The format of the entries is:
45###   file-name-pattern = propname[=value][;propname[=value]...]
46### The file-name-pattern can contain wildcards (such as '*' and
47### '?').  All entries which match will be applied to the file.
48### Note that auto-props functionality must be enabled, which
49### is typically done by setting the 'enable-auto-props' option.
50*.c = svn:eol-style=native
51*.cc = svn:eol-style=native
52*.cpp = svn:eol-style=native
53*.h = svn:eol-style=native
54*.rc = svn:eol-style=native
55*.dsp = svn:eol-style=native
56*.dsw = svn:eol-style=native
57*.sln = svn:eol-style=native
58*.vcproj = svn:eol-style=native
59configure = svn:eol-style=native;svn:executable
60*.sh = svn:eol-style=native;svn:executable
61*.pl = svn:eol-style=native;svn:executable
62*.py = svn:eol-style=native;svn:executable
63*.txt = svn:mime-type=text/plain;svn:eol-style=native
64*.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
65*.ucm = svn:eol-style=native
66*.html = svn:eol-style=native;svn:mime-type=text/html
67*.htm = svn:eol-style=native;svn:mime-type=text/html
68*.xml = svn:eol-style=native
69Makefile = svn:eol-style=native
70*.in = svn:eol-style=native
71*.mak = svn:eol-style=native
72*.mk = svn:eol-style=native
73*.png = svn:mime-type=image/png
74*.jpeg = svn:mime-type=image/jpeg
75*.jpg = svn:mime-type=image/jpeg
76*.bin = svn:mime-type=application/octet-stream
77*.brk = svn:mime-type=application/octet-stream
78*.cnv = svn:mime-type=application/octet-stream
79*.dat = svn:mime-type=application/octet-stream
80*.icu = svn:mime-type=application/octet-stream
81*.res = svn:mime-type=application/octet-stream
82*.spp = svn:mime-type=application/octet-stream
83# new additions 2007-dec-5 srl
84*.rtf = mime-type=text/rtf
85*.pdf = mime-type=application/pdf
86# changed 2008-04-08: modified .txt, above, adding mime-type
87# changed 2010-11-09: modified .java, adding mime-type
88# Note: The escape syntax for semicolon (";;") is supported since subversion 1.6.1
89"""
90
91
92# file_types:  The parsed form of the svn auto-props specification.
93#              A list of file types - .cc, .cpp, .txt, etc.
94#              each element is a [type, proplist]
95#              "type" is a regular expression string that will match a file name
96#              prop list is another list, one element per property.
97#              Each property item is a two element list, [prop name, prop value]
98file_types = list()
99
100def parse_auto_props():
101    aprops = svn_auto_props.splitlines()
102    for propline in aprops:
103        if re.match("\s*(#.*)?$", propline):         # Match comment and blank lines
104            continue
105        if re.match("\s*\[auto-props\]", propline):  # Match the [auto-props] line.
106            continue
107        if not re.match("\s*[^\s]+\s*=", propline):  # minimal syntax check for <file-type> =
108            print "Bad line from autoprops definitions: " + propline
109            continue
110        file_type, string_proplist = propline.split("=", 1)
111
112        #transform the file type expression from autoprops into a normal regular expression.
113        #  e.g.  "*.cpp"  ==>  ".*\.cpp$"
114        file_type = file_type.strip()
115        file_type = file_type.replace(".", "\.")
116        file_type = file_type.replace("*", ".*")
117        file_type = file_type + "$"
118
119        # example string_proplist at this point: " svn:eol-style=native;svn:executable"
120        # split on ';' into a list of properties.  The negative lookahead and lookbehind
121        # in the split regexp are to prevent matching on ';;', which is an escaped ';'
122        # within a property value.
123        string_proplist = re.split("(?<!;);(?!;)", string_proplist)
124        proplist = list()
125        for prop in string_proplist:
126            if prop.find("=") >= 0:
127                prop_name, prop_val = prop.split("=", 1)
128            else:
129                # properties with no explicit value, e.g. svn:executable
130                prop_name, prop_val = prop, ""
131            prop_name = prop_name.strip()
132            prop_val = prop_val.strip()
133            # unescape any ";;" in a property value, e.g. the mime-type from
134            #    *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
135            prop_val = prop_val.replace(";;", ";");
136            proplist.append((prop_name, prop_val))
137
138        file_types.append((file_type, proplist))
139    # print file_types
140
141
142def runCommand(cmd):
143    output_file = os.popen(cmd);
144    output_text = output_file.read();
145    exit_status = output_file.close();
146    if exit_status:
147        print >>sys.stderr, '"', cmd, '" failed.  Exiting.'
148        sys.exit(exit_status)
149    return output_text
150
151
152def usage():
153    print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]"
154
155
156#
157#  UTF-8 file check.   For text files, add a charset to the mime-type if their contents are UTF-8
158#    file_name:        name of a text file.
159#    base_mime_type:   svn:mime-type property value from the auto-props file (no charset= part)
160#    actual_mime_type: existing svn:mime-type property value for the file.
161#    return:           svn:mime-type property value, with charset added when appropriate.
162#
163def check_utf8(file_name, base_mime_type, actual_mime_type):
164
165    # If the file already has a charset in its mime-type, don't make any change.
166
167    if actual_mime_type.find("charset=") > 0:
168        return actual_mime_type;
169
170    f = open(file_name, 'r')
171    bytes = f.read()
172    f.close()
173
174    if all(ord(byte) < 128 for byte in bytes):
175        # pure ASCII.
176        # print "Pure ASCII " + file_name
177        return base_mime_type
178
179    try:
180        bytes.decode("UTF-8")
181    except UnicodeDecodeError:
182        print "warning: %s: not ASCII, not UTF-8" % file_name
183        return base_mime_type
184
185    if ord(bytes[0]) != 0xef:
186      print "UTF-8 file with no BOM: " + file_name
187
188    # Append charset=utf-8.
189    return base_mime_type + ';charset=utf-8'
190
191
192def main(argv):
193    fix_problems = False;
194    try:
195        opts, args = getopt.getopt(argv, "fh", ("fix", "help"))
196    except getopt.GetoptError:
197        print "unrecognized option: " + argv[0]
198        usage()
199        sys.exit(2)
200    for opt, arg in opts:
201        if opt in ("-h", "--help"):
202            usage()
203            sys.exit()
204        if opt in ("-f", "--fix"):
205            fix_problems = True
206    if args:
207        print "unexpected command line argument"
208        usage()
209        sys.exit()
210
211    parse_auto_props()
212    output = runCommand("svn ls -R ");
213    file_list = output.splitlines()
214
215    for f in file_list:
216        if os.path.isdir(f):
217            # print "Skipping dir " + f
218            continue
219        if not os.path.isfile(f):
220            print "Repository file not in working copy: " + f
221            continue;
222
223        for file_pattern, props in file_types:
224            if re.match(file_pattern, f):
225                # print "doing " + f
226                for propname, propval in props:
227                    actual_propval = runCommand("svn propget --strict " + propname + " " + f)
228                    #print propname + ": " + actual_propval
229                    if propname == "svn:mime-type" and propval.find("text/") == 0:
230                        # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8
231                        propval = check_utf8(f, propval, actual_propval)
232                    if not (propval == actual_propval or (propval == "" and actual_propval == "*")):
233                        print "svn propset %s '%s' %s" % (propname, propval, f)
234                        if fix_problems:
235                            os.system("svn propset %s '%s' %s" % (propname, propval, f))
236                    if propname == "svn:eol-style" and propval == "native":
237                        if os.system("grep -q -v \r " + f):
238                            if fix_problems:
239                                print f + ": Removing DOS CR characters."
240                                os.system("sed -i s/\r// " + f);
241                            else:
242                                print f + " contains DOS CR characters."
243
244
245if __name__ == "__main__":
246    main(sys.argv[1:])
247