1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#! /usr/bin/python
2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)# Copyright (C) 2009-2010, International Business Machines Corporation, Google and Others.
4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)# All rights reserved.
5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#
7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#  Script to check and fix svn property settings for ICU source files.
8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#  Also check for the correct line endings on files with svn:eol-style = native
9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#
10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#  THIS SCRIPT DOES NOT WORK ON WINDOWS
11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#     It only works correctly on platforms where the native line ending is a plain \n
12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#
13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#  usage:
14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#     icu-svnprops-check.py  [options]
15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#
16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#  options:
17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#     -f | --fix     Fix any problems that are found
18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#     -h | --help    Print a usage line and exit.
19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#
20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#  The tool operates recursively on the directory from which it is run.
21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#  Only files from the svn repository are checked.
22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#  No changes are made to the repository; only the working copy will be altered.
23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)import sys
25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)import os
26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)import os.path
27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)import re
28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)import getopt
29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#
31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#  svn autoprops definitions.
32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#      Copy and paste here the ICU recommended auto-props from
33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#      http://icu-project.org/docs/subversion_howto/index.html
34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#
35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#  This program will parse this autoprops string, and verify that files in
36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#  the repository have the recommeded properties set.
37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#
38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)svn_auto_props = """
39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)### Section for configuring automatic properties.
40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)[auto-props]
41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)### The format of the entries is:
42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)###   file-name-pattern = propname[=value][;propname[=value]...]
43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)### The file-name-pattern can contain wildcards (such as '*' and
44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)### '?').  All entries which match will be applied to the file.
45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)### Note that auto-props functionality must be enabled, which
46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)### is typically done by setting the 'enable-auto-props' option.
47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.c = svn:eol-style=native
48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.cc = svn:eol-style=native
49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.cpp = svn:eol-style=native
50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.h = svn:eol-style=native
51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.rc = svn:eol-style=native
52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.dsp = svn:eol-style=native
53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.dsw = svn:eol-style=native
54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.sln = svn:eol-style=native
55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.vcproj = svn:eol-style=native
56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)configure = svn:eol-style=native;svn:executable
57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.sh = svn:eol-style=native;svn:executable
58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.pl = svn:eol-style=native;svn:executable
59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.py = svn:eol-style=native;svn:executable
60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.txt = svn:mime-type=text/plain;svn:eol-style=native
61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.ucm = svn:eol-style=native
63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.html = svn:eol-style=native;svn:mime-type=text/html
64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.htm = svn:eol-style=native;svn:mime-type=text/html
65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.xml = svn:eol-style=native
66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Makefile = svn:eol-style=native
67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.in = svn:eol-style=native
68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.mak = svn:eol-style=native
69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.mk = svn:eol-style=native
70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.png = svn:mime-type=image/png
71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.jpeg = svn:mime-type=image/jpeg
72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.jpg = svn:mime-type=image/jpeg
73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.bin = svn:mime-type=application/octet-stream
74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.brk = svn:mime-type=application/octet-stream
75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.cnv = svn:mime-type=application/octet-stream
76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.dat = svn:mime-type=application/octet-stream
77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.icu = svn:mime-type=application/octet-stream
78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.res = svn:mime-type=application/octet-stream
79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.spp = svn:mime-type=application/octet-stream
80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)# new additions 2007-dec-5 srl
81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.rtf = mime-type=text/rtf
82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*.pdf = mime-type=application/pdf
83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)# changed 2008-04-08: modified .txt, above, adding mime-type
84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)# changed 2010-11-09: modified .java, adding mime-type
85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)# Note: The escape syntax for semicolon (";;") is supported since subversion 1.6.1
86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)"""
87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)# file_types:  The parsed form of the svn auto-props specification.
90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#              A list of file types - .cc, .cpp, .txt, etc.
91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#              each element is a [type, proplist]
92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#              "type" is a regular expression string that will match a file name
93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#              prop list is another list, one element per property.
94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#              Each property item is a two element list, [prop name, prop value]
95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)file_types = list()
96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)def parse_auto_props():
98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    aprops = svn_auto_props.splitlines()
99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for propline in aprops:
100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if re.match("\s*(#.*)?$", propline):         # Match comment and blank lines
101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue
102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if re.match("\s*\[auto-props\]", propline):  # Match the [auto-props] line.
103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue
104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if not re.match("\s*[^\s]+\s*=", propline):  # minimal syntax check for <file-type> =
105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            print "Bad line from autoprops definitions: " + propline
106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue
107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        file_type, string_proplist = propline.split("=", 1)
108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        #transform the file type expression from autoprops into a normal regular expression.
110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        #  e.g.  "*.cpp"  ==>  ".*\.cpp$"
111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        file_type = file_type.strip()
112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        file_type = file_type.replace(".", "\.")
113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        file_type = file_type.replace("*", ".*")
114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        file_type = file_type + "$"
115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        # example string_proplist at this point: " svn:eol-style=native;svn:executable"
117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        # split on ';' into a list of properties.  The negative lookahead and lookbehind
118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        # in the split regexp are to prevent matching on ';;', which is an escaped ';'
119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        # within a property value.
120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        string_proplist = re.split("(?<!;);(?!;)", string_proplist)
121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        proplist = list()
122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for prop in string_proplist:
123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if prop.find("=") >= 0:
124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                prop_name, prop_val = prop.split("=", 1)
125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            else:
126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                # properties with no explicit value, e.g. svn:executable
127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                prop_name, prop_val = prop, ""
128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            prop_name = prop_name.strip()
129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            prop_val = prop_val.strip()
130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            # unescape any ";;" in a property value, e.g. the mime-type from
131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            #    *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            prop_val = prop_val.replace(";;", ";");
133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            proplist.append((prop_name, prop_val))
134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        file_types.append((file_type, proplist))
136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    # print file_types
137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)def runCommand(cmd):
140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    output_file = os.popen(cmd);
141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    output_text = output_file.read();
142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    exit_status = output_file.close();
143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if exit_status:
144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        print >>sys.stderr, '"', cmd, '" failed.  Exiting.'
145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        sys.exit(exit_status)
146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return output_text
147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)def usage():
150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]"
151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#
154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#  UTF-8 file check.   For text files, add a charset to the mime-type if their contents are UTF-8
155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#    file_name:        name of a text file.
156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#    base_mime_type:   svn:mime-type property value from the auto-props file (no charset= part)
157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#    actual_mime_type: existing svn:mime-type property value for the file.
158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#    return:           svn:mime-type property value, with charset added when appropriate.
159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#
160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)def check_utf8(file_name, base_mime_type, actual_mime_type):
161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    # If the file already has a charset in its mime-type, don't make any change.
163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if actual_mime_type.find("charset=") > 0:
165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return actual_mime_type;
166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    f = open(file_name, 'r')
168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bytes = f.read()
169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    f.close()
170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if all(ord(byte) < 128 for byte in bytes):
172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        # pure ASCII.
173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        # print "Pure ASCII " + file_name
174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return base_mime_type
175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    try:
177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        bytes.decode("UTF-8")
178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    except UnicodeDecodeError:
179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        print "warning: %s: not ASCII, not UTF-8" % file_name
180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return base_mime_type
181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if ord(bytes[0]) != 0xef:
183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      print "UTF-8 file with no BOM: " + file_name
184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    # Append charset=utf-8.  Need to escape the ';' because it is ultimately going to a shell.
186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return base_mime_type + '\\;charset=utf-8'
187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)def main(argv):
190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fix_problems = False;
191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    try:
192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        opts, args = getopt.getopt(argv, "fh", ("fix", "help"))
193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    except getopt.GetoptError:
194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        print "unrecognized option: " + argv[0]
195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        usage()
196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        sys.exit(2)
197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for opt, arg in opts:
198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if opt in ("-h", "--help"):
199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            usage()
200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            sys.exit()
201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if opt in ("-f", "--fix"):
202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fix_problems = True
203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if args:
204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        print "unexpected command line argument"
205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        usage()
206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        sys.exit()
207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    parse_auto_props()
209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    output = runCommand("svn ls -R ");
210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    file_list = output.splitlines()
211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for f in file_list:
213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if os.path.isdir(f):
214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            # print "Skipping dir " + f
215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue
216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if not os.path.isfile(f):
217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            print "Repository file not in working copy: " + f
218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for file_pattern, props in file_types:
221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if re.match(file_pattern, f):
222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                # print "doing " + f
223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                for propname, propval in props:
224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    actual_propval = runCommand("svn propget --strict " + propname + " " + f)
225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    #print propname + ": " + actual_propval
226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if propname == "svn:mime-type" and propval.find("text/") == 0:
227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8
228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        propval = check_utf8(f, propval, actual_propval)
229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if not (propval == actual_propval or (propval == "" and actual_propval == "*")):
230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        print "svn propset %s '%s' %s" % (propname, propval, f)
231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        if fix_problems:
232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            os.system("svn propset %s '%s' %s" % (propname, propval, f))
233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if propname == "svn:eol-style" and propval == "native":
234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        if os.system("grep -q -v \r " + f):
235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            if fix_problems:
236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                print f + ": Removing DOS CR characters."
237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                os.system("sed -i s/\r// " + f);
238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            else:
239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                print f + " contains DOS CR characters."
240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)if __name__ == "__main__":
243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    main(sys.argv[1:])
244