1#!/usr/bin/env python
2# Copyright (c) 2011 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Convert any unicode characters found in the input file to C literals."""
7
8import codecs
9import optparse
10import os
11import sys
12
13
14def main(argv):
15  parser = optparse.OptionParser()
16  usage = 'Usage: %prog -o <output_dir> <input_file>'
17  parser.set_usage(usage)
18  parser.add_option('-o', dest='output_dir')
19
20  options, arglist = parser.parse_args(argv)
21
22  if not options.output_dir:
23    print "output_dir required"
24    return 1
25
26  if len(arglist) != 2:
27    print "input_file required"
28    return 1
29
30  in_filename = arglist[1]
31
32  if not in_filename.endswith('.utf8'):
33    print "input_file should end in .utf8"
34    return 1
35
36  out_filename = os.path.join(options.output_dir, os.path.basename(
37      os.path.splitext(in_filename)[0]))
38
39  WriteEscapedFile(in_filename, out_filename)
40  return 0
41
42
43def WriteEscapedFile(in_filename, out_filename):
44  input_data = codecs.open(in_filename, 'r', 'utf8').read()
45  with codecs.open(out_filename, 'w', 'ascii') as out_file:
46    for i, char in enumerate(input_data):
47      if ord(char) > 127:
48        out_file.write(repr(char.encode('utf8'))[1:-1])
49        if input_data[i + 1:i + 2] in '0123456789abcdefABCDEF':
50          out_file.write('""')
51      else:
52        out_file.write(char.encode('ascii'))
53
54
55if __name__ == '__main__':
56  sys.exit(main(sys.argv))
57