1/*
2 ***************************************************************************
3 * Copyright (C) 2008-2015, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 ***************************************************************************
6 *   file name:  uspoof_build.cpp
7 *   encoding:   US-ASCII
8 *   tab size:   8 (not used)
9 *   indentation:4
10 *
11 *   created on: 2008 Dec 8
12 *   created by: Andy Heninger
13 *
14 *   Unicode Spoof Detection Data Builder
15 *   Builder-related functions are kept in separate files so that applications not needing
16 *   the builder can more easily exclude them, typically by means of static linking.
17 *
18 *   There are three relatively independent sets of Spoof data,
19 *      Confusables,
20 *      Whole Script Confusables
21 *      ID character extensions.
22 *
23 *   The data tables for each are built separately, each from its own definitions
24 */
25
26#include "unicode/utypes.h"
27#include "unicode/uspoof.h"
28#include "unicode/unorm.h"
29#include "unicode/uregex.h"
30#include "unicode/ustring.h"
31#include "cmemory.h"
32#include "uspoof_impl.h"
33#include "uhash.h"
34#include "uvector.h"
35#include "uassert.h"
36#include "uarrsort.h"
37#include "uspoof_conf.h"
38#include "uspoof_wsconf.h"
39
40#if !UCONFIG_NO_NORMALIZATION
41
42U_NAMESPACE_USE
43
44// Defined in uspoof.cpp, initializes file-static variables.
45U_CFUNC void uspoof_internalInitStatics(UErrorCode *status);
46
47// The main data building function
48
49U_CAPI USpoofChecker * U_EXPORT2
50uspoof_openFromSource(const char *confusables,  int32_t confusablesLen,
51                      const char *confusablesWholeScript, int32_t confusablesWholeScriptLen,
52                      int32_t *errorType, UParseError *pe, UErrorCode *status) {
53    uspoof_internalInitStatics(status);
54    if (U_FAILURE(*status)) {
55        return NULL;
56    }
57#if UCONFIG_NO_REGULAR_EXPRESSIONS
58    *status = U_UNSUPPORTED_ERROR;
59    return NULL;
60#else
61    if (errorType!=NULL) {
62        *errorType = 0;
63    }
64    if (pe != NULL) {
65        pe->line = 0;
66        pe->offset = 0;
67        pe->preContext[0] = 0;
68        pe->postContext[0] = 0;
69    }
70
71    // Set up a shell of a spoof detector, with empty data.
72    SpoofData *newSpoofData = new SpoofData(*status);
73    SpoofImpl *This = new SpoofImpl(newSpoofData, *status);
74
75    // Compile the binary data from the source (text) format.
76    ConfusabledataBuilder::buildConfusableData(This, confusables, confusablesLen, errorType, pe, *status);
77    buildWSConfusableData(This, confusablesWholeScript, confusablesWholeScriptLen, pe, *status);
78
79    if (U_FAILURE(*status)) {
80        delete This;
81        This = NULL;
82    }
83    return (USpoofChecker *)This;
84#endif // UCONFIG_NO_REGULAR_EXPRESSIONS
85}
86
87#endif
88