1/*
2******************************************************************************
3*
4*   Copyright (C) 2008-2011, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*   file name:  uspoof_conf.h
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2009Jan05
14*   created by: Andy Heninger
15*
16*   Internal classes for compiling confusable data into its binary (runtime) form.
17*/
18
19#ifndef __USPOOF_BUILDCONF_H__
20#define __USPOOF_BUILDCONF_H__
21
22#if !UCONFIG_NO_NORMALIZATION
23
24#if !UCONFIG_NO_REGULAR_EXPRESSIONS
25
26#include "uspoof_impl.h"
27
28U_NAMESPACE_BEGIN
29
30// SPUString
31//              Holds a string that is the result of one of the mappings defined
32//              by the confusable mapping data (confusables.txt from Unicode.org)
33//              Instances of SPUString exist during the compilation process only.
34
35struct SPUString : public UMemory {
36    UnicodeString  *fStr;             // The actual string.
37    int32_t         fStrTableIndex;   // Index into the final runtime data for this string.
38                                      //  (or, for length 1, the single string char itself,
39                                      //   there being no string table entry for it.)
40    SPUString(UnicodeString *s);
41    ~SPUString();
42};
43
44
45//  String Pool   A utility class for holding the strings that are the result of
46//                the spoof mappings.  These strings will utimately end up in the
47//                run-time String Table.
48//                This is sort of like a sorted set of strings, except that ICU's anemic
49//                built-in collections don't support those, so it is implemented with a
50//                combination of a uhash and a UVector.
51
52
53class SPUStringPool : public UMemory {
54  public:
55    SPUStringPool(UErrorCode &status);
56    ~SPUStringPool();
57
58    // Add a string. Return the string from the table.
59    // If the input parameter string is already in the table, delete the
60    //  input parameter and return the existing string.
61    SPUString *addString(UnicodeString *src, UErrorCode &status);
62
63
64    // Get the n-th string in the collection.
65    SPUString *getByIndex(int32_t i);
66
67    // Sort the contents; affects the ordering of getByIndex().
68    void sort(UErrorCode &status);
69
70    int32_t size();
71
72  private:
73    UVector     *fVec;    // Elements are SPUString *
74    UHashtable  *fHash;   // Key: UnicodeString  Value: SPUString
75};
76
77
78// class ConfusabledataBuilder
79//     An instance of this class exists while the confusable data is being built from source.
80//     It encapsulates the intermediate data structures that are used for building.
81//     It exports one static function, to do a confusable data build.
82
83class ConfusabledataBuilder : public UMemory {
84  private:
85    SpoofImpl  *fSpoofImpl;
86    UChar      *fInput;
87    UHashtable *fSLTable;
88    UHashtable *fSATable;
89    UHashtable *fMLTable;
90    UHashtable *fMATable;
91    UnicodeSet *fKeySet;     // A set of all keys (UChar32s) that go into the four mapping tables.
92
93    // The binary data is first assembled into the following four collections, then
94    //   copied to its final raw-memory destination.
95    UVector            *fKeyVec;
96    UVector            *fValueVec;
97    UnicodeString      *fStringTable;
98    UVector            *fStringLengthsTable;
99
100    SPUStringPool      *stringPool;
101    URegularExpression *fParseLine;
102    URegularExpression *fParseHexNum;
103    int32_t             fLineNum;
104
105    ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status);
106    ~ConfusabledataBuilder();
107    void build(const char * confusables, int32_t confusablesLen, UErrorCode &status);
108
109    // Add an entry to the key and value tables being built
110    //   input:  data from SLTable, MATable, etc.
111    //   outut:  entry added to fKeyVec and fValueVec
112    void addKeyEntry(UChar32     keyChar,     // The key character
113                     UHashtable *table,       // The table, one of SATable, MATable, etc.
114                     int32_t     tableFlag,   // One of USPOOF_SA_TABLE_FLAG, etc.
115                     UErrorCode &status);
116
117    // From an index into fKeyVec & fValueVec
118    //   get a UnicodeString with the corresponding mapping.
119    UnicodeString getMapping(int32_t index);
120
121    // Populate the final binary output data array with the compiled data.
122    void outputData(UErrorCode &status);
123
124  public:
125    static void buildConfusableData(SpoofImpl *spImpl, const char * confusables,
126        int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status);
127};
128U_NAMESPACE_END
129
130#endif
131#endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
132#endif  // __USPOOF_BUILDCONF_H__
133