1def lines_get(f):
2  '''Parse a file like object, removing comments and returning a list of
3     lines.'''
4  def cut_comment(line):
5    first_hash = line.find('#')
6    if first_hash == -1:
7      return line
8    return line[:first_hash]
9
10  return [x for x in [cut_comment(x[:-1]) for x in f.readlines()] if len(x)]
11
12def line_split(line):
13  '''Split a line based on a semicolon separator.'''
14  def normalise(word):
15    return word.lstrip().rstrip()
16  return [normalise(x) for x in line.split(';')]
17
18def codepoints_parse(token):
19  '''Parse a Unicode style code-point range. Return either a single value or a
20     tuple of (start, end) for a range of code-points.'''
21  def fromHex(token):
22    return int(token, 16)
23  parts = token.split('..')
24  if len(parts) == 2:
25    return (fromHex(parts[0]), fromHex(parts[1]))
26  elif len(parts) == 1:
27    return fromHex(parts[0])
28  else:
29    raise ValueError(token)
30
31def unicode_file_parse(input, map, default_value = None):
32  '''Parse a file like object, @input where the first column is a code-point
33     range and the second column is mapped via the given dict, @map.'''
34  ranges = []
35  tokens = [line_split(x) for x in lines_get(input)]
36  for line in tokens:
37    if len(line) == 2:
38      codepoints = codepoints_parse(line[0])
39      value = map[line[1]]
40      if value == default_value:
41        continue
42
43      if type(codepoints) == int:
44        codepoints = (codepoints, codepoints)
45
46      ranges.append((codepoints[0], codepoints[1], value))
47    else:
48      raise ValueError(line)
49
50  return ranges
51
52def sort_and_merge(ranges):
53  '''Given a list of (start, end, value), merge elements where the ranges are
54     continuous and the values are the same.'''
55  output = []
56  ranges.sort()
57  current = None
58  for v in ranges:
59    if current is None:
60      current = v
61      continue
62    if current[1] + 1 == v[0] and current[2] == v[2]:
63      current = (current[0], v[1], v[2])
64    else:
65      output.append(current)
66      current = v
67  if current is not None:
68    output.append(current)
69
70  return output
71