1f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick#!/usr/bin/env python
2f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# Copyright (c) 2010 Google Inc. All rights reserved.
3f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick#
4f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# Redistribution and use in source and binary forms, with or without
5f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# modification, are permitted provided that the following conditions are
6f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# met:
7f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# 
8f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick#     * Redistributions of source code must retain the above copyright
9f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# notice, this list of conditions and the following disclaimer.
10f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick#     * Redistributions in binary form must reproduce the above
11f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# copyright notice, this list of conditions and the following disclaimer
12f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# in the documentation and/or other materials provided with the
13f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# distribution.
14f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick#     * Neither the name of Google Inc. nor the names of its
15f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# contributors may be used to endorse or promote products derived from
16f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# this software without specific prior written permission.
17f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# 
18f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
305ddde30071f639962dd557c453f2ad01f8f0fd00Kristian Monsenimport csv
31f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickimport os.path
32f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickimport string
33f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickimport sys
34f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
355ddde30071f639962dd557c453f2ad01f8f0fd00Kristian MonsenENTITY = 0
365ddde30071f639962dd557c453f2ad01f8f0fd00Kristian MonsenVALUE = 1
37f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
38f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickdef convert_entity_to_cpp_name(entity):
39f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick    postfix = "EntityName"
40f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick    if entity[-1] == ";":
41f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick        return "%sSemicolon%s" % (entity[:-1], postfix)
42f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick    return "%s%s" % (entity, postfix)
43f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
44f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
45f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickdef convert_entity_to_uchar_array(entity):
46f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick    return "{'%s'}" % "', '".join(entity)
47f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
48f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
49f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickdef convert_value_to_int(value):
50f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick    assert(value[0] == "U")
51f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick    assert(value[1] == "+")
52f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick    return "0x" + value[2:]
53f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
54f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
55f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickdef offset_table_entry(offset):
56f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick    return "    &staticEntityTable[%s]," % offset
57f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
58f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
59f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickprogram_name = os.path.basename(__file__)
60f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickif len(sys.argv) < 4 or sys.argv[1] != "-o":
6181bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdoch    # Python 3, change to: print("Usage: %s -o OUTPUT_FILE INPUT_FILE" % program_name, file=sys.stderr)
6281bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdoch    sys.stderr.write("Usage: %s -o OUTPUT_FILE INPUT_FILE\n" % program_name)
63f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick    exit(1)
64f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
65f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickoutput_path = sys.argv[2]
66f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickinput_path = sys.argv[3]
67f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
68f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickhtml_entity_names_file = open(input_path)
695ddde30071f639962dd557c453f2ad01f8f0fd00Kristian Monsenentries = list(csv.reader(html_entity_names_file))
70f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickhtml_entity_names_file.close()
71f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
7281bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdochentries.sort(key = lambda entry: entry[ENTITY])
73f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickentity_count = len(entries)
74f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
75f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickoutput_file = open(output_path, "w")
76f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
7781bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdochoutput_file.write("""/*
78f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick * Copyright (C) 2010 Google, Inc. All Rights Reserved.
79f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick *
80f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick * Redistribution and use in source and binary forms, with or without
81f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick * modification, are permitted provided that the following conditions
82f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick * are met:
83f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick * 1. Redistributions of source code must retain the above copyright
84f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick *    notice, this list of conditions and the following disclaimer.
85f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick * 2. Redistributions in binary form must reproduce the above copyright
86f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick *    notice, this list of conditions and the following disclaimer in the
87f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick *    documentation and/or other materials provided with the distribution.
88f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick *
89f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
90f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
91f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
92f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
93f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
94f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
95f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
96f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
97f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
98f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
99f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
100f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick */
101f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
1025ddde30071f639962dd557c453f2ad01f8f0fd00Kristian Monsen// THIS FILE IS GENERATED BY WebCore/html/parser/create-html-entity-table
103f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick// DO NOT EDIT (unless you are a ninja)!
104f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
105f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick#include "config.h"
106f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick#include "HTMLEntityTable.h"
107f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
108f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merricknamespace WebCore {
109f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
110f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merricknamespace {
11181bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdoch""")
112f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
113f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickfor entry in entries:
11481bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdoch    output_file.write("const UChar %sEntityName[] = %s;" % (
1155ddde30071f639962dd557c453f2ad01f8f0fd00Kristian Monsen        convert_entity_to_cpp_name(entry[ENTITY]),
11681bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdoch        convert_entity_to_uchar_array(entry[ENTITY])))
117f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
11881bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdochoutput_file.write("""
11981bc750723a18f21cd17d1b173cd2a4dda9cea6eBen MurdochHTMLEntityTableEntry staticEntityTable[%s] = {""" % entity_count)
120f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
121f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickindex = {}
122f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickoffset = 0
123f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickfor entry in entries:
1245ddde30071f639962dd557c453f2ad01f8f0fd00Kristian Monsen    letter = entry[ENTITY][0]
125f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick    if not index.get(letter):
126f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick        index[letter] = offset
12781bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdoch    output_file.write('    { %sEntityName, %s, %s },' % (
1285ddde30071f639962dd557c453f2ad01f8f0fd00Kristian Monsen        convert_entity_to_cpp_name(entry[ENTITY]),
1295ddde30071f639962dd557c453f2ad01f8f0fd00Kristian Monsen        len(entry[ENTITY]),
13081bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdoch        convert_value_to_int(entry[VALUE])))
131f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick    offset += 1
132f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
13381bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdochoutput_file.write("""};
13481bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdoch""")
135f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
13681bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdochoutput_file.write("const HTMLEntityTableEntry* uppercaseOffset[] = {")
13781bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdochfor letter in string.ascii_uppercase:
13881bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdoch    output_file.write(offset_table_entry(index[letter]))
13981bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdochoutput_file.write(offset_table_entry(index['a']))
14081bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdochoutput_file.write("""};
141f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
14281bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdochconst HTMLEntityTableEntry* lowercaseOffset[] = {""")
14381bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdochfor letter in string.ascii_lowercase:
14481bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdoch    output_file.write(offset_table_entry(index[letter]))
14581bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdochoutput_file.write(offset_table_entry(entity_count))
14681bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdochoutput_file.write("""};
147f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
148f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick}
149f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
150f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickconst HTMLEntityTableEntry* HTMLEntityTable::firstEntryStartingWith(UChar c)
151f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick{
152f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick    if (c >= 'A' && c <= 'Z')
153f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick        return uppercaseOffset[c - 'A'];
154f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick    if (c >= 'a' && c <= 'z')
155f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick        return lowercaseOffset[c - 'a'];
156f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick    return 0;
157f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick}
158f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
159f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickconst HTMLEntityTableEntry* HTMLEntityTable::lastEntryStartingWith(UChar c)
160f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick{
161f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick    if (c >= 'A' && c <= 'Z')
162f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick        return uppercaseOffset[c - 'A' + 1] - 1;
163f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick    if (c >= 'a' && c <= 'z')
164f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick        return lowercaseOffset[c - 'a' + 1] - 1;
165f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick    return 0;
166f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick}
167f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
168f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickconst HTMLEntityTableEntry* HTMLEntityTable::firstEntry()
169f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick{
170f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick    return &staticEntityTable[0];
171f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick}
172f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
173f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrickconst HTMLEntityTableEntry* HTMLEntityTable::lastEntry()
174f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick{
175f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick    return &staticEntityTable[%s - 1];
176f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick}
177f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick
178f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick}
17981bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdoch""" % entity_count)
180