15569331642446be05292e3e1f8a51218827168cdclairehodef lines_get(f):
25569331642446be05292e3e1f8a51218827168cdclaireho  '''Parse a file like object, removing comments and returning a list of
35569331642446be05292e3e1f8a51218827168cdclaireho     lines.'''
45569331642446be05292e3e1f8a51218827168cdclaireho  def cut_comment(line):
55569331642446be05292e3e1f8a51218827168cdclaireho    first_hash = line.find('#')
65569331642446be05292e3e1f8a51218827168cdclaireho    if first_hash == -1:
75569331642446be05292e3e1f8a51218827168cdclaireho      return line
85569331642446be05292e3e1f8a51218827168cdclaireho    return line[:first_hash]
95569331642446be05292e3e1f8a51218827168cdclaireho
105569331642446be05292e3e1f8a51218827168cdclaireho  return [x for x in [cut_comment(x[:-1]) for x in f.readlines()] if len(x)]
115569331642446be05292e3e1f8a51218827168cdclaireho
125569331642446be05292e3e1f8a51218827168cdclairehodef line_split(line):
135569331642446be05292e3e1f8a51218827168cdclaireho  '''Split a line based on a semicolon separator.'''
145569331642446be05292e3e1f8a51218827168cdclaireho  def normalise(word):
155569331642446be05292e3e1f8a51218827168cdclaireho    return word.lstrip().rstrip()
165569331642446be05292e3e1f8a51218827168cdclaireho  return [normalise(x) for x in line.split(';')]
175569331642446be05292e3e1f8a51218827168cdclaireho
185569331642446be05292e3e1f8a51218827168cdclairehodef codepoints_parse(token):
195569331642446be05292e3e1f8a51218827168cdclaireho  '''Parse a Unicode style code-point range. Return either a single value or a
205569331642446be05292e3e1f8a51218827168cdclaireho     tuple of (start, end) for a range of code-points.'''
215569331642446be05292e3e1f8a51218827168cdclaireho  def fromHex(token):
225569331642446be05292e3e1f8a51218827168cdclaireho    return int(token, 16)
235569331642446be05292e3e1f8a51218827168cdclaireho  parts = token.split('..')
245569331642446be05292e3e1f8a51218827168cdclaireho  if len(parts) == 2:
255569331642446be05292e3e1f8a51218827168cdclaireho    return (fromHex(parts[0]), fromHex(parts[1]))
265569331642446be05292e3e1f8a51218827168cdclaireho  elif len(parts) == 1:
275569331642446be05292e3e1f8a51218827168cdclaireho    return fromHex(parts[0])
285569331642446be05292e3e1f8a51218827168cdclaireho  else:
295569331642446be05292e3e1f8a51218827168cdclaireho    raise ValueError(token)
305569331642446be05292e3e1f8a51218827168cdclaireho
315569331642446be05292e3e1f8a51218827168cdclairehodef unicode_file_parse(input, map, default_value = None):
325569331642446be05292e3e1f8a51218827168cdclaireho  '''Parse a file like object, @input where the first column is a code-point
335569331642446be05292e3e1f8a51218827168cdclaireho     range and the second column is mapped via the given dict, @map.'''
345569331642446be05292e3e1f8a51218827168cdclaireho  ranges = []
355569331642446be05292e3e1f8a51218827168cdclaireho  tokens = [line_split(x) for x in lines_get(input)]
365569331642446be05292e3e1f8a51218827168cdclaireho  for line in tokens:
375569331642446be05292e3e1f8a51218827168cdclaireho    if len(line) == 2:
385569331642446be05292e3e1f8a51218827168cdclaireho      codepoints = codepoints_parse(line[0])
395569331642446be05292e3e1f8a51218827168cdclaireho      value = map[line[1]]
405569331642446be05292e3e1f8a51218827168cdclaireho      if value == default_value:
415569331642446be05292e3e1f8a51218827168cdclaireho        continue
425569331642446be05292e3e1f8a51218827168cdclaireho
435569331642446be05292e3e1f8a51218827168cdclaireho      if type(codepoints) == int:
445569331642446be05292e3e1f8a51218827168cdclaireho        codepoints = (codepoints, codepoints)
455569331642446be05292e3e1f8a51218827168cdclaireho
465569331642446be05292e3e1f8a51218827168cdclaireho      ranges.append((codepoints[0], codepoints[1], value))
475569331642446be05292e3e1f8a51218827168cdclaireho    else:
485569331642446be05292e3e1f8a51218827168cdclaireho      raise ValueError(line)
495569331642446be05292e3e1f8a51218827168cdclaireho
505569331642446be05292e3e1f8a51218827168cdclaireho  return ranges
515569331642446be05292e3e1f8a51218827168cdclaireho
525569331642446be05292e3e1f8a51218827168cdclairehodef sort_and_merge(ranges):
535569331642446be05292e3e1f8a51218827168cdclaireho  '''Given a list of (start, end, value), merge elements where the ranges are
545569331642446be05292e3e1f8a51218827168cdclaireho     continuous and the values are the same.'''
555569331642446be05292e3e1f8a51218827168cdclaireho  output = []
565569331642446be05292e3e1f8a51218827168cdclaireho  ranges.sort()
575569331642446be05292e3e1f8a51218827168cdclaireho  current = None
585569331642446be05292e3e1f8a51218827168cdclaireho  for v in ranges:
595569331642446be05292e3e1f8a51218827168cdclaireho    if current is None:
605569331642446be05292e3e1f8a51218827168cdclaireho      current = v
615569331642446be05292e3e1f8a51218827168cdclaireho      continue
625569331642446be05292e3e1f8a51218827168cdclaireho    if current[1] + 1 == v[0] and current[2] == v[2]:
635569331642446be05292e3e1f8a51218827168cdclaireho      current = (current[0], v[1], v[2])
645569331642446be05292e3e1f8a51218827168cdclaireho    else:
655569331642446be05292e3e1f8a51218827168cdclaireho      output.append(current)
665569331642446be05292e3e1f8a51218827168cdclaireho      current = v
675569331642446be05292e3e1f8a51218827168cdclaireho  if current is not None:
685569331642446be05292e3e1f8a51218827168cdclaireho    output.append(current)
695569331642446be05292e3e1f8a51218827168cdclaireho
705569331642446be05292e3e1f8a51218827168cdclaireho  return output
71