177f488345316fba46c271fc04bea470819ae1712Seigo Nonaka#!/usr/bin/env python
277f488345316fba46c271fc04bea470819ae1712Seigo Nonaka#
377f488345316fba46c271fc04bea470819ae1712Seigo Nonaka# Copyright (C) 2016 The Android Open Source Project
477f488345316fba46c271fc04bea470819ae1712Seigo Nonaka#
577f488345316fba46c271fc04bea470819ae1712Seigo Nonaka# Licensed under the Apache License, Version 2.0 (the "License");
677f488345316fba46c271fc04bea470819ae1712Seigo Nonaka# you may not use this file except in compliance with the License.
777f488345316fba46c271fc04bea470819ae1712Seigo Nonaka# You may obtain a copy of the License at
877f488345316fba46c271fc04bea470819ae1712Seigo Nonaka#
977f488345316fba46c271fc04bea470819ae1712Seigo Nonaka#      http://www.apache.org/licenses/LICENSE-2.0
1077f488345316fba46c271fc04bea470819ae1712Seigo Nonaka#
1177f488345316fba46c271fc04bea470819ae1712Seigo Nonaka# Unless required by applicable law or agreed to in writing, software
1277f488345316fba46c271fc04bea470819ae1712Seigo Nonaka# distributed under the License is distributed on an "AS IS" BASIS,
1377f488345316fba46c271fc04bea470819ae1712Seigo Nonaka# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1477f488345316fba46c271fc04bea470819ae1712Seigo Nonaka# See the License for the specific language governing permissions and
1577f488345316fba46c271fc04bea470819ae1712Seigo Nonaka# limitations under the License.
1677f488345316fba46c271fc04bea470819ae1712Seigo Nonaka#
1777f488345316fba46c271fc04bea470819ae1712Seigo Nonaka"""Generate header file for unicode data."""
1877f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
1977f488345316fba46c271fc04bea470819ae1712Seigo Nonakaimport optparse
2077f488345316fba46c271fc04bea470819ae1712Seigo Nonakaimport sys
2177f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
2277f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
2377f488345316fba46c271fc04bea470819ae1712Seigo NonakaUNICODE_EMOJI_TEMPLATE="""
2477f488345316fba46c271fc04bea470819ae1712Seigo Nonaka/* file generated by frameworks/minikin/lib/minikin/Android.mk */
2577f488345316fba46c271fc04bea470819ae1712Seigo Nonaka#ifndef MINIKIN_UNICODE_EMOJI_H
2677f488345316fba46c271fc04bea470819ae1712Seigo Nonaka#define MINIKIN_UNICODE_EMOJI_H
2777f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
2877f488345316fba46c271fc04bea470819ae1712Seigo Nonaka#include <stdint.h>
2977f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
3077f488345316fba46c271fc04bea470819ae1712Seigo Nonakanamespace android {
3177f488345316fba46c271fc04bea470819ae1712Seigo Nonakanamespace generated {
3277f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
3377f488345316fba46c271fc04bea470819ae1712Seigo Nonakaint32_t EMOJI_LIST[] = {
3477f488345316fba46c271fc04bea470819ae1712Seigo Nonaka@@@EMOJI_DATA@@@
3577f488345316fba46c271fc04bea470819ae1712Seigo Nonaka};
3677f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
3777f488345316fba46c271fc04bea470819ae1712Seigo Nonaka}  // namespace generated
3877f488345316fba46c271fc04bea470819ae1712Seigo Nonaka}  // namespace android
3977f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
4077f488345316fba46c271fc04bea470819ae1712Seigo Nonaka#endif  // MINIKIN_UNICODE_EMOJI_H
4177f488345316fba46c271fc04bea470819ae1712Seigo Nonaka"""
4277f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
4377f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
4477f488345316fba46c271fc04bea470819ae1712Seigo Nonakadef _create_opt_parser():
4577f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  parser = optparse.OptionParser()
4677f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  parser.add_option('-i', '--input', type='str', action='store',
4777f488345316fba46c271fc04bea470819ae1712Seigo Nonaka                    help='path to input emoji-data.txt')
4877f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  parser.add_option('-o', '--output', type='str', action='store',
4977f488345316fba46c271fc04bea470819ae1712Seigo Nonaka                    help='path to output UnicodeEmoji.h')
5077f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  return parser
5177f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
5277f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
5377f488345316fba46c271fc04bea470819ae1712Seigo Nonakadef _read_emoji_data(emoji_data_file_path):
5477f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  result = []
5577f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  with open(emoji_data_file_path) as emoji_data_file:
5677f488345316fba46c271fc04bea470819ae1712Seigo Nonaka    for line in emoji_data_file:
5777f488345316fba46c271fc04bea470819ae1712Seigo Nonaka      if '#' in line:
5877f488345316fba46c271fc04bea470819ae1712Seigo Nonaka        line = line[:line.index('#')]  # Drop comments.
5977f488345316fba46c271fc04bea470819ae1712Seigo Nonaka      if not line.strip():
6077f488345316fba46c271fc04bea470819ae1712Seigo Nonaka        continue  # Skip empty line.
6177f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
6277f488345316fba46c271fc04bea470819ae1712Seigo Nonaka      code_points, prop = line.split(';')
6377f488345316fba46c271fc04bea470819ae1712Seigo Nonaka      code_points = code_points.strip()
6477f488345316fba46c271fc04bea470819ae1712Seigo Nonaka      prop = prop.strip()
6577f488345316fba46c271fc04bea470819ae1712Seigo Nonaka      if prop != 'Emoji':
6677f488345316fba46c271fc04bea470819ae1712Seigo Nonaka        break  # Only collect Emoji property code points
6777f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
6877f488345316fba46c271fc04bea470819ae1712Seigo Nonaka      if '..' in code_points:  # code point range
6977f488345316fba46c271fc04bea470819ae1712Seigo Nonaka        cp_start, cp_end = code_points.split('..')
7077f488345316fba46c271fc04bea470819ae1712Seigo Nonaka        result.extend(xrange(int(cp_start, 16), int(cp_end, 16) + 1))
7177f488345316fba46c271fc04bea470819ae1712Seigo Nonaka      else:
7277f488345316fba46c271fc04bea470819ae1712Seigo Nonaka        code_point = int(code_points, 16)
7377f488345316fba46c271fc04bea470819ae1712Seigo Nonaka        result.append(code_point)
7477f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  return result
7577f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
7677f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
7777f488345316fba46c271fc04bea470819ae1712Seigo Nonakadef _generate_header_contents(emoji_list):
7877f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  INDENT = ' ' * 4
7977f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  JOINER = ', '
8077f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
8177f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  hex_list = ['0x%04X' % x for x in emoji_list]
8277f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  lines = []
8377f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  tmp_line = '%s%s' % (INDENT, hex_list[0])
8477f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  for hex_str in hex_list[1:]:
8577f488345316fba46c271fc04bea470819ae1712Seigo Nonaka    if len(tmp_line) + len(JOINER) + len(hex_str) >= 100:
8677f488345316fba46c271fc04bea470819ae1712Seigo Nonaka      lines.append(tmp_line + ',')
8777f488345316fba46c271fc04bea470819ae1712Seigo Nonaka      tmp_line = '%s%s' % (INDENT, hex_str)
8877f488345316fba46c271fc04bea470819ae1712Seigo Nonaka    else:
8977f488345316fba46c271fc04bea470819ae1712Seigo Nonaka      tmp_line = '%s%s%s' % (tmp_line, JOINER, hex_str)
9077f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  lines.append(tmp_line)
9177f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
9277f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  template = UNICODE_EMOJI_TEMPLATE
9377f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  template = template.replace('@@@EMOJI_DATA@@@', '\n'.join(lines))
9477f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  return template
9577f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
9677f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
9777f488345316fba46c271fc04bea470819ae1712Seigo Nonakaif __name__ == '__main__':
9877f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  opt_parser = _create_opt_parser()
9977f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  opts, _ = opt_parser.parse_args()
10077f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
10177f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  emoji_list = _read_emoji_data(opts.input)
10277f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  header = _generate_header_contents(emoji_list)
10377f488345316fba46c271fc04bea470819ae1712Seigo Nonaka  with open(opts.output, 'w') as header_file:
10477f488345316fba46c271fc04bea470819ae1712Seigo Nonaka    header_file.write(header)
10577f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
106