1b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/*
2b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru***************************************************************************
31b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert* Copyright (C) 2008-2015, International Business Machines Corporation
4b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* and others. All Rights Reserved.
5b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru***************************************************************************
6b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru*   file name:  uspoof.h
7b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru*   encoding:   US-ASCII
8b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru*   tab size:   8 (not used)
9b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru*   indentation:4
10b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru*
11b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru*   created on: 2008Feb13
12b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru*   created by: Andy Heninger
13b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru*
14b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru*   Unicode Spoof Detection
15b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru*/
16b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
17b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#ifndef USPOOF_H
18b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define USPOOF_H
19b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
20b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/utypes.h"
21b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/uset.h"
22b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/parseerr.h"
2350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/localpointer.h"
24b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
25b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#if !UCONFIG_NO_NORMALIZATION
26b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
27b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
2850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if U_SHOW_CPLUSPLUS_API
29b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/unistr.h"
30b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/uniset.h"
31b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#endif
32b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
33b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
3427f654740f2a26ad62a5c155af9199af9e69b889claireho/**
3527f654740f2a26ad62a5c155af9199af9e69b889claireho * \file
3627f654740f2a26ad62a5c155af9199af9e69b889claireho * \brief Unicode Security and Spoofing Detection, C API.
37b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
38b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * These functions are intended to check strings, typically
39b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * identifiers of some type, such as URLs, for the presence of
40b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * characters that are likely to be visually confusing -
41b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * for cases where the displayed form of an identifier may
42b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * not be what it appears to be.
43b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
44b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Unicode Technical Report #36, http://unicode.org/reports/tr36, and
45b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Unicode Technical Standard #39, http://unicode.org/reports/tr39
46b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * "Unicode security considerations", give more background on
47b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * security an spoofing issues with Unicode identifiers.
48b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The tests and checks provided by this module implement the recommendations
4927f654740f2a26ad62a5c155af9199af9e69b889claireho * from those Unicode documents.
50b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
51b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The tests available on identifiers fall into two general categories:
5250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *   -#  Single identifier tests.  Check whether an identifier is
53b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *       potentially confusable with any other string, or is suspicious
54b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *       for other reasons.
55b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *   -#  Two identifier tests.  Check whether two specific identifiers are confusable.
56b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *       This does not consider whether either of strings is potentially
57b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *       confusable with any string other than the exact one specified.
58b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
59b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The steps to perform confusability testing are
60b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *   -#  Open a USpoofChecker.
61b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *   -#  Configure the USPoofChecker for the desired set of tests.  The tests that will
62b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *       be performed are specified by a set of USpoofChecks flags.
63b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *   -#  Perform the checks using the pre-configured USpoofChecker.  The results indicate
64b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *       which (if any) of the selected tests have identified possible problems with the identifier.
65b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *       Results are reported as a set of USpoofChecks flags;  this mirrors the form in which
6650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *       the set of tests to perform was originally specified to the USpoofChecker.
67b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
68b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * A USpoofChecker may be used repeatedly to perform checks on any number of identifiers.
69b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
70b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Thread Safety: The test functions for checking a single identifier, or for testing
71b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * whether two identifiers are possible confusable, are thread safe.
72b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * They may called concurrently, from multiple threads, using the same USpoofChecker instance.
73b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
74b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * More generally, the standard ICU thread safety rules apply:  functions that take a
75b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * const USpoofChecker parameter are thread safe.  Those that take a non-const
76b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * USpoofChecier are not thread safe.
77b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
78b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
79b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Descriptions of the available checks.
80b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
81b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * When testing whether pairs of identifiers are confusable, with the uspoof_areConfusable()
82b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * family of functions, the relevant tests are
83b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
8450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *   -# USPOOF_SINGLE_SCRIPT_CONFUSABLE:  All of the characters from the two identifiers are
85b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *      from a single script, and the two identifiers are visually confusable.
86b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *   -# USPOOF_MIXED_SCRIPT_CONFUSABLE:  At least one of the identifiers contains characters
87b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *      from more than one script, and the two identifiers are visually confusable.
8850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *   -# USPOOF_WHOLE_SCRIPT_CONFUSABLE: Each of the two identifiers is of a single script, but
8950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *      the two identifiers are from different scripts, and they are visually confusable.
90b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
91b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The safest approach is to enable all three of these checks as a group.
92b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
93b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * USPOOF_ANY_CASE is a modifier for the above tests.  If the identifiers being checked can
94b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * be of mixed case and are used in a case-sensitive manner, this option should be specified.
95b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
9650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If the identifiers being checked are used in a case-insensitive manner, and if they are
97b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * displayed to users in lower-case form only, the USPOOF_ANY_CASE option should not be
98b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * specified.  Confusabality issues involving upper case letters will not be reported.
99b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
100b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * When performing tests on a single identifier, with the uspoof_check() family of functions,
101b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * the relevant tests are:
102b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
103b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *    -# USPOOF_MIXED_SCRIPT_CONFUSABLE: the identifier contains characters from multiple
10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *       scripts, and there exists an identifier of a single script that is visually confusable.
105b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *    -# USPOOF_WHOLE_SCRIPT_CONFUSABLE: the identifier consists of characters from a single
106b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *       script, and there exists a visually confusable identifier.
10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *       The visually confusable identifier also consists of characters from a single script.
108b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *       but not the same script as the identifier being checked.
109b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *    -# USPOOF_ANY_CASE: modifies the mixed script and whole script confusables tests.  If
11027f654740f2a26ad62a5c155af9199af9e69b889claireho *       specified, the checks will consider confusable characters of any case.  If this flag is not
111b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *       set, the test is performed assuming case folded identifiers.
112b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *    -# USPOOF_SINGLE_SCRIPT: check that the identifier contains only characters from a
113b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *       single script.  (Characters from the 'common' and 'inherited' scripts are ignored.)
114b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *       This is not a test for confusable identifiers
115b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *    -# USPOOF_INVISIBLE: check an identifier for the presence of invisible characters,
116b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *       such as zero-width spaces, or character sequences that are
11750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *       likely not to display, such as multiple occurrences of the same
118b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *       non-spacing mark.  This check does not test the input string as a whole
119b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *       for conformance to any particular syntax for identifiers.
120b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *    -# USPOOF_CHAR_LIMIT: check that an identifier contains only characters from a specified set
121b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *       of acceptable characters.  See uspoof_setAllowedChars() and
122b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *       uspoof_setAllowedLocales().
123b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
124b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *  Note on Scripts:
12550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *     Characters from the Unicode Scripts "Common" and "Inherited" are ignored when considering
126b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *     the script of an identifier. Common characters include digits and symbols that
127b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *     are normally used with text from more than one script.
128b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
12950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *  Identifier Skeletons:  A skeleton is a transformation of an identifier, such that
13050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *  all identifiers that are confusable with each other have the same skeleton.
13150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *  Using skeletons, it is possible to build a dictionary data structure for
13250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *  a set of identifiers, and then quickly test whether a new identifier is
13350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *  confusable with an identifier already in the set.  The uspoof_getSkeleton()
13450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *  family of functions will produce the skeleton from an identifier.
13550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
13650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *  Note that skeletons are not guaranteed to be stable between versions
13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *  of Unicode or ICU, so an applications should not rely on creating a permanent,
13850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *  or difficult to update, database of skeletons.  Instabilities result from
13950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *  identifying new pairs or sequences of characters that are visually
14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *  confusable, and thus must be mapped to the same skeleton character(s).
14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
1421b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert *  Skeletons are computed using the algorithm and data describe in Unicode UAX 39.
1431b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert *  The latest proposed update, UAX 39 Version 8 draft 1, says "the tables SL, SA, and ML
1441b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert *  were still problematic, and discouraged from use in [Uniocde] 7.0.
1451b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert *  They were thus removed from version 8.0"
1461b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert *
1471b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert *  In light of this, the default mapping data included with ICU 55 uses the
1481b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert *  Unicode 7 MA (Multi script Any case) table data for the other type options
1491b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert *  (Single Script, Any Case), (Single Script, Lower Case) and (Multi Script, Lower Case).
150b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
151b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
152b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustruct USpoofChecker;
153b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querutypedef struct USpoofChecker USpoofChecker; /**< typedef for C of USpoofChecker */
154b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
155b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
156b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Enum for the kinds of checks that USpoofChecker can perform.
157b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * These enum values are used both to select the set of checks that
158b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * will be performed, and to report results from the check function.
159b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
16050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2
161b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
162b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querutypedef enum USpoofChecks {
163b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    /**   Single script confusable test.
164b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru      *   When testing whether two identifiers are confusable, report that they are if
165b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru      *   both are from the same script and they are visually confusable.
166b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru      *   Note: this test is not applicable to a check of a single identifier.
167b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru      */
168b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    USPOOF_SINGLE_SCRIPT_CONFUSABLE =   1,
169b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
170b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    /** Mixed script confusable test.
171b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru     *  When checking a single identifier, report a problem if
172b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru     *    the identifier contains multiple scripts, and
17350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *    is confusable with some other identifier in a single script
174b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru     *  When testing whether two identifiers are confusable, report that they are if
17550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *    the two IDs are visually confusable,
176b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru     *    and at least one contains characters from more than one script.
177b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru     */
178b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    USPOOF_MIXED_SCRIPT_CONFUSABLE  =   2,
179b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
180b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    /** Whole script confusable test.
181b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru     *  When checking a single identifier, report a problem if
182b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru     *    The identifier is of a single script, and
183b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru     *    there exists a confusable identifier in another script.
18450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *  When testing whether two identifiers are confusable, report that they are if
185b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru     *    each is of a single script,
186b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru     *    the scripts of the two identifiers are different, and
187b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru     *    the identifiers are visually confusable.
188b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru     */
189b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    USPOOF_WHOLE_SCRIPT_CONFUSABLE  =   4,
190b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
191b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    /** Any Case Modifier for confusable identifier tests.
192b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        If specified, consider all characters, of any case, when looking for confusables.
193b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        If USPOOF_ANY_CASE is not specified, identifiers being checked are assumed to have been
19450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case folded.  Upper case confusable characters will not be checked.
195b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        Selects between Lower Case Confusable and
196b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        Any Case Confusable.   */
197b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    USPOOF_ANY_CASE                 =   8,
198b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
1998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    /**
2008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      * Check that an identifier is no looser than the specified RestrictionLevel.
2018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      * The default if uspoof_setRestrctionLevel() is not called is HIGHLY_RESTRICTIVE.
2028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      *
2038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      * If USPOOF_AUX_INFO is enabled the actual restriction level of the
2048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      * identifier being tested will also be returned by uspoof_check().
2058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      *
2068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      * @see URestrictionLevel
2078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      * @see uspoof_setRestrictionLevel
2088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      * @see USPOOF_AUX_INFO
2098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      *
2108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      * @stable ICU 51
2118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      */
2128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    USPOOF_RESTRICTION_LEVEL        = 16,
2138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#ifndef U_HIDE_DEPRECATED_API
21550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /** Check that an identifier contains only characters from a
216b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru      * single script (plus chars from the common and inherited scripts.)
217b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru      * Applies to checks of a single identifier check only.
2188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      * @deprecated ICU 51  Use RESTRICTION_LEVEL instead.
219b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru      */
2208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    USPOOF_SINGLE_SCRIPT            =  USPOOF_RESTRICTION_LEVEL,
2218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#endif  /* U_HIDE_DEPRECATED_API */
2228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
22350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /** Check an identifier for the presence of invisible characters,
224b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru      * such as zero-width spaces, or character sequences that are
22550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho      * likely not to display, such as multiple occurrences of the same
226b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru      * non-spacing mark.  This check does not test the input string as a whole
227b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru      * for conformance to any particular syntax for identifiers.
228b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru      */
229b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    USPOOF_INVISIBLE                =  32,
230b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
231b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    /** Check that an identifier contains only characters from a specified set
232b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru      * of acceptable characters.  See uspoof_setAllowedChars() and
233b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru      * uspoof_setAllowedLocales().
234b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru      */
235b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    USPOOF_CHAR_LIMIT               =  64,
236b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
2378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius   /**
2388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     * Check that an identifier does not include decimal digits from
2398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     * more than one numbering system.
2408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     *
241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @stable ICU 51
2428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     */
2438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    USPOOF_MIXED_NUMBERS            = 128,
2448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius   /**
2468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     * Enable all spoof checks.
2478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     *
2488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     * @stable ICU 4.6
2498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     */
2508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    USPOOF_ALL_CHECKS               = 0xFFFF,
2518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    /**
2538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      * Enable the return of auxillary (non-error) information in the
2548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      * upper bits of the check results value.
2558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      *
2568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      * If this "check" is not enabled, the results of uspoof_check() will be zero when an
2578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      * identifier passes all of the enabled checks.
2588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      *
2598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      * If this "check" is enabled, (uspoof_check() & USPOOF_ALL_CHECKS) will be zero
2608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      * when an identifier passes all checks.
2618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      *
262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      * @stable ICU 51
2638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius      */
2648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    USPOOF_AUX_INFO                  = 0x40000000
2658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
266b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    } USpoofChecks;
267b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
268b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
2698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    /**
2708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     * Constants from UAX #39 for use in setRestrictionLevel(), and
2718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     * for returned identifier restriction levels in check results.
272fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @stable ICU 51
2738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     */
2748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    typedef enum URestrictionLevel {
2758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        /**
2768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         * Only ASCII characters: U+0000..U+007F
2778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         *
278fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * @stable ICU 51
2798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         */
2808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        USPOOF_ASCII = 0x10000000,
281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius          * All characters in each identifier must be from a single script.
283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius          *
2841b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert          * @stable ICU 53
285fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius          */
286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        USPOOF_SINGLE_SCRIPT_RESTRICTIVE = 0x20000000,
2878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        /**
2888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         * All characters in each identifier must be from a single script, or from the combinations: Latin + Han +
2898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         * Hiragana + Katakana; Latin + Han + Bopomofo; or Latin + Han + Hangul. Note that this level will satisfy the
2908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         * vast majority of Latin-script users; also that TR36 has ASCII instead of Latin.
2918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         *
292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * @stable ICU 51
2938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         */
294fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        USPOOF_HIGHLY_RESTRICTIVE = 0x30000000,
2958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        /**
2968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         * Allow Latin with other scripts except Cyrillic, Greek, Cherokee Otherwise, the same as Highly Restrictive
2978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         *
298fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * @stable ICU 51
2998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         */
300fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        USPOOF_MODERATELY_RESTRICTIVE = 0x40000000,
3018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        /**
3028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         * Allow arbitrary mixtures of scripts. Otherwise, the same as Moderately Restrictive.
3038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         *
304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * @stable ICU 51
3058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         */
306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        USPOOF_MINIMALLY_RESTRICTIVE = 0x50000000,
3078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        /**
3088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         * Any valid identifiers, including characters outside of the Identifier Profile.
3098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         *
310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * @stable ICU 51
3118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         */
312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        USPOOF_UNRESTRICTIVE = 0x60000000,
313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius          * Mask for selecting the Restriction Level bits from the return value of uspoof_check().
315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius          *
3161b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert          * @stable ICU 53
317fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius          */
318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         USPOOF_RESTRICTION_LEVEL_MASK = 0x7F000000
319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } URestrictionLevel;
3208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
321b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
322b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *  Create a Unicode Spoof Checker, configured to perform all
323b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *  checks except for USPOOF_LOCALE_LIMIT and USPOOF_CHAR_LIMIT.
324b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *  Note that additional checks may be added in the future,
325b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *  resulting in the changes to the default checking behavior.
326b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
327b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *  @param status  The error code, set if this function encounters a problem.
328b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *  @return        the newly created Spoof Checker
32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *  @stable ICU 4.2
330b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
33150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE USpoofChecker * U_EXPORT2
332b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_open(UErrorCode *status);
333b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
334b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
335b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
336f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Open a Spoof checker from its serialized form, stored in 32-bit-aligned memory.
337b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Inverse of uspoof_serialize().
33850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The memory containing the serialized data must remain valid and unchanged
339b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * as long as the spoof checker, or any cloned copies of the spoof checker,
340b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * are in use.  Ownership of the memory remains with the caller.
341b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The spoof checker (and any clones) must be closed prior to deleting the
342b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * serialized data.
343b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
344b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param data a pointer to 32-bit-aligned memory containing the serialized form of spoof data
345b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param length the number of bytes available at data;
346b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *               can be more than necessary
347b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param pActualLength receives the actual number of bytes at data taken up by the data;
348b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                      can be NULL
349b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param pErrorCode ICU error code
350b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @return the spoof checker.
351b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
352b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @see uspoof_open
353b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @see uspoof_serialize
35450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2
355b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
35627f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE USpoofChecker * U_EXPORT2
357b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength,
358b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                          UErrorCode *pErrorCode);
359b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
360b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
361b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * Open a Spoof Checker from the source form of the spoof data.
362f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  * The two inputs correspond to the Unicode data files confusables.txt
363f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  * and confusablesWholeScript.txt as described in Unicode UAX #39.
364f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  * The syntax of the source data is as described in UAX #39 for
365b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * these files, and the content of these files is acceptable input.
366b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *
367b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * The character encoding of the (char *) input text is UTF-8.
368b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *
369b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @param confusables a pointer to the confusable characters definitions,
370b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                    as found in file confusables.txt from unicode.org.
371b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @param confusablesLen The length of the confusables text, or -1 if the
372b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                    input string is zero terminated.
373b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @param confusablesWholeScript
374b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                    a pointer to the whole script confusables definitions,
37550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  *                    as found in the file confusablesWholeScript.txt from unicode.org.
376b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @param confusablesWholeScriptLen The length of the whole script confusables text, or
377b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                    -1 if the input string is zero terminated.
378b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @param errType     In the event of an error in the input, indicates
379b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                    which of the input files contains the error.
380b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                    The value is one of USPOOF_SINGLE_SCRIPT_CONFUSABLE or
381b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                    USPOOF_WHOLE_SCRIPT_CONFUSABLE, or
382b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                    zero if no errors are found.
383b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @param pe          In the event of an error in the input, receives the position
384b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                    in the input text (line, offset) of the error.
385b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @param status      an in/out ICU UErrorCode.  Among the possible errors is
386b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                    U_PARSE_ERROR, which is used to report syntax errors
387b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                    in the input.
388b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @return            A spoof checker that uses the rules from the input files.
38950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  * @stable ICU 4.2
390b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  */
39127f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE USpoofChecker * U_EXPORT2
392b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_openFromSource(const char *confusables,  int32_t confusablesLen,
393b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                      const char *confusablesWholeScript, int32_t confusablesWholeScriptLen,
394b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                      int32_t *errType, UParseError *pe, UErrorCode *status);
395b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
396b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
397b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
398b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * Close a Spoof Checker, freeing any memory that was being held by
399b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *   its implementation.
40050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  * @stable ICU 4.2
401b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  */
40250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE void U_EXPORT2
403b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_close(USpoofChecker *sc);
404b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
40550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if U_SHOW_CPLUSPLUS_API
40650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
40750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_BEGIN
40850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
40950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
41050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * \class LocalUSpoofCheckerPointer
41150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * "Smart pointer" class, closes a USpoofChecker via uspoof_close().
41250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For most methods see the LocalPointerBase base class.
41350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
41450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @see LocalPointerBase
41550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @see LocalPointer
41627f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4
41750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
41850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_DEFINE_LOCAL_OPEN_POINTER(LocalUSpoofCheckerPointer, USpoofChecker, uspoof_close);
41950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
42050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_END
42150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
424b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
425b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Clone a Spoof Checker.  The clone will be set to perform the same checks
426b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *   as the original source.
427b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
428b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param sc       The source USpoofChecker
429b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param status   The error code, set if this function encounters a problem.
430b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @return
43150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2
432b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
43350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE USpoofChecker * U_EXPORT2
434b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_clone(const USpoofChecker *sc, UErrorCode *status);
435b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
436b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
437b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
438b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Specify the set of checks that will be performed by the check
439b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * functions of this Spoof Checker.
440b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
441b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param sc       The USpoofChecker
442b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param checks         The set of checks that this spoof checker will perform.
443b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                 The value is a bit set, obtained by OR-ing together
444b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                 values from enum USpoofChecks.
445b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param status   The error code, set if this function encounters a problem.
44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2
447b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
448b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
44950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE void U_EXPORT2
450b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status);
451b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
452b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
453b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Get the set of checks that this Spoof Checker has been configured to perform.
454b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
455b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param sc       The USpoofChecker
456b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param status   The error code, set if this function encounters a problem.
457b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @return         The set of checks that this spoof checker will perform.
458b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                 The value is a bit set, obtained by OR-ing together
459b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                 values from enum USpoofChecks.
46050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2
461b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
462b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
46350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE int32_t U_EXPORT2
464b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_getChecks(const USpoofChecker *sc, UErrorCode *status);
465b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
4668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius/**
4678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * Set the loosest restriction level allowed. The default if this function
4688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * is not called is HIGHLY_RESTRICTIVE.
4698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * Calling this function also enables the RESTRICTION_LEVEL check.
4708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * @param restrictionLevel The loosest restriction level allowed.
4718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * @see URestrictionLevel
472fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  * @stable ICU 51
4738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  */
474fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_STABLE void U_EXPORT2
4758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliususpoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel);
4768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius/**
4798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * Get the Restriction Level that will be tested if the checks include RESTRICTION_LEVEL.
4808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *
4818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * @return The restriction level
4828393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * @see URestrictionLevel
483fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  * @stable ICU 51
4848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  */
485fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_STABLE URestrictionLevel U_EXPORT2
4868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliususpoof_getRestrictionLevel(const USpoofChecker *sc);
4878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
488b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
489b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Limit characters that are acceptable in identifiers being checked to those
490b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * normally used with the languages associated with the specified locales.
491b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Any previously specified list of locales is replaced by the new settings.
492b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
493b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * A set of languages is determined from the locale(s), and
494b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * from those a set of acceptable Unicode scripts is determined.
495b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Characters from this set of scripts, along with characters from
496b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * the "common" and "inherited" Unicode Script categories
497b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * will be permitted.
498b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
499b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Supplying an empty string removes all restrictions;
500b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * characters from any script will be allowed.
501b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
502b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The USPOOF_CHAR_LIMIT test is automatically enabled for this
503b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * USpoofChecker when calling this function with a non-empty list
504b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * of locales.
505b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
506b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The Unicode Set of characters that will be allowed is accessible
507b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * via the uspoof_getAllowedChars() function.  uspoof_setAllowedLocales()
508b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * will <i>replace</i> any previously applied set of allowed characters.
509b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
510b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Adjustments, such as additions or deletions of certain classes of characters,
511b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * can be made to the result of uspoof_setAllowedLocales() by
512b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * fetching the resulting set with uspoof_getAllowedChars(),
513b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * manipulating it with the Unicode Set API, then resetting the
514b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * spoof detectors limits with uspoof_setAllowedChars()
515b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
516b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param sc           The USpoofChecker
517b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param localesList  A list list of locales, from which the language
518b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                     and associated script are extracted.  The locales
519b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                     are comma-separated if there is more than one.
520b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                     White space may not appear within an individual locale,
521b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                     but is ignored otherwise.
522b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                     The locales are syntactically like those from the
523b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                     HTTP Accept-Language header.
524b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                     If the localesList is empty, no restrictions will be placed on
525b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                     the allowed characters.
526b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
527b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param status       The error code, set if this function encounters a problem.
52850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2
529b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
53050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE void U_EXPORT2
531b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status);
532b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
533b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
534b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Get a list of locales for the scripts that are acceptable in strings
535b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *  to be checked.  If no limitations on scripts have been specified,
536b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *  an empty string will be returned.
537b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
538b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *  uspoof_setAllowedChars() will reset the list of allowed to be empty.
539b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
540b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *  The format of the returned list is the same as that supplied to
541b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *  uspoof_setAllowedLocales(), but returned list may not be identical
542b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *  to the originally specified string; the string may be reformatted,
543b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *  and information other than languages from
544b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *  the originally specified locales may be omitted.
545b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
546b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param sc           The USpoofChecker
547b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param status       The error code, set if this function encounters a problem.
548b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @return             A string containing a list of  locales corresponding
549b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                     to the acceptable scripts, formatted like an
550b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                     HTTP Accept Language value.
551b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
55250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2
553b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
55450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE const char * U_EXPORT2
555b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status);
556b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
557b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
558b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
559b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Limit the acceptable characters to those specified by a Unicode Set.
560b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *   Any previously specified character limit is
561b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *   is replaced by the new settings.  This includes limits on
562b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *   characters that were set with the uspoof_setAllowedLocales() function.
563b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
564b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The USPOOF_CHAR_LIMIT test is automatically enabled for this
565b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * USpoofChecker by this function.
566b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
567b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param sc       The USpoofChecker
568b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param chars    A Unicode Set containing the list of
56950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                 characters that are permitted.  Ownership of the set
570b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                 remains with the caller.  The incoming set is cloned by
571b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                 this function, so there are no restrictions on modifying
572b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                 or deleting the USet after calling this function.
573b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param status   The error code, set if this function encounters a problem.
57450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2
575b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
57650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE void U_EXPORT2
577b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status);
578b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
579b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
580b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
581b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Get a USet for the characters permitted in an identifier.
582b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * This corresponds to the limits imposed by the Set Allowed Characters
583b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * functions. Limitations imposed by other checks will not be
584b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * reflected in the set returned by this function.
585b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
586b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The returned set will be frozen, meaning that it cannot be modified
587b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * by the caller.
588b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
589b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Ownership of the returned set remains with the Spoof Detector.  The
590b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * returned set will become invalid if the spoof detector is closed,
591b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * or if a new set of allowed characters is specified.
592b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
593b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
594b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param sc       The USpoofChecker
595b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param status   The error code, set if this function encounters a problem.
596b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @return         A USet containing the characters that are permitted by
597b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                 the USPOOF_CHAR_LIMIT test.
59850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2
599b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
60050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE const USet * U_EXPORT2
601b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status);
602b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
603b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
60450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if U_SHOW_CPLUSPLUS_API
605b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
606b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Limit the acceptable characters to those specified by a Unicode Set.
607b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *   Any previously specified character limit is
608b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *   is replaced by the new settings.    This includes limits on
609b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *   characters that were set with the uspoof_setAllowedLocales() function.
610b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
611b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The USPOOF_CHAR_LIMIT test is automatically enabled for this
612b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * USoofChecker by this function.
613b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
614b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param sc       The USpoofChecker
615b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param chars    A Unicode Set containing the list of
61650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                 characters that are permitted.  Ownership of the set
617b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                 remains with the caller.  The incoming set is cloned by
618b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                 this function, so there are no restrictions on modifying
6198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                 or deleting the UnicodeSet after calling this function.
620b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param status   The error code, set if this function encounters a problem.
62150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2
622b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
62350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE void U_EXPORT2
624103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliususpoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UErrorCode *status);
625b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
626b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
627b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
628b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Get a UnicodeSet for the characters permitted in an identifier.
629b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * This corresponds to the limits imposed by the Set Allowed Characters /
630b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * UnicodeSet functions. Limitations imposed by other checks will not be
631b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * reflected in the set returned by this function.
632b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
633b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The returned set will be frozen, meaning that it cannot be modified
634b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * by the caller.
635b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
636b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Ownership of the returned set remains with the Spoof Detector.  The
637b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * returned set will become invalid if the spoof detector is closed,
638b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * or if a new set of allowed characters is specified.
639b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
640b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
641b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param sc       The USpoofChecker
642b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param status   The error code, set if this function encounters a problem.
643b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @return         A UnicodeSet containing the characters that are permitted by
644b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                 the USPOOF_CHAR_LIMIT test.
64550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2
646b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
647103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusU_STABLE const icu::UnicodeSet * U_EXPORT2
648b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status);
649b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#endif
650b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
651b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
652b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
653b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Check the specified string for possible security issues.
65450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The text to be checked will typically be an identifier of some sort.
655b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The set of checks to be performed is specified with uspoof_setChecks().
656b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
657b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param sc      The USpoofChecker
6588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @param id      The identifier to be checked for possible security issues,
659b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                in UTF-16 format.
660b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param length  the length of the string to be checked, expressed in
661b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                16 bit UTF-16 code units, or -1 if the string is
662b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                zero terminated.
6638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @param position      An out parameter.
6648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                Originally, the index of the first string position that failed a check.
6658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                Now, always returns zero.
6668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                This parameter may be null.
66750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param status  The error code, set if an error occurred while attempting to
668b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                perform the check.
669b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                Spoofing or security issues detected with the input string are
670b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                not reported here, but through the function's return value.
671b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @return        An integer value with bits set for any potential security
672b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                or spoofing issues detected.  The bits are defined by
6738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
6748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                will be zero if the input string passes all of the
6758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                enabled checks.
67650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2
677b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
67850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE int32_t U_EXPORT2
679b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_check(const USpoofChecker *sc,
6808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                         const UChar *id, int32_t length,
681b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                         int32_t *position,
682b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                         UErrorCode *status);
683b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
684b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
685b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
686b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Check the specified string for possible security issues.
68750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The text to be checked will typically be an identifier of some sort.
688b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The set of checks to be performed is specified with uspoof_setChecks().
689b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
690b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param sc      The USpoofChecker
6918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @param id      A identifier to be checked for possible security issues, in UTF8 format.
692b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param length  the length of the string to be checked, or -1 if the string is
693b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                zero terminated.
6948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @param position      An out parameter.
6958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                Originally, the index of the first string position that failed a check.
6968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                Now, always returns zero.
6978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                This parameter may be null.
6988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                @deprecated ICU 51
69950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param status  The error code, set if an error occurred while attempting to
700b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                perform the check.
701b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                Spoofing or security issues detected with the input string are
702b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                not reported here, but through the function's return value.
703b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                If the input contains invalid UTF-8 sequences,
704b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                a status of U_INVALID_CHAR_FOUND will be returned.
705b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @return        An integer value with bits set for any potential security
706b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                or spoofing issues detected.  The bits are defined by
7078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
7088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                will be zero if the input string passes all of the
7098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                enabled checks.
71050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2
711b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
71250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE int32_t U_EXPORT2
713b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_checkUTF8(const USpoofChecker *sc,
7148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                 const char *id, int32_t length,
715b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                 int32_t *position,
716b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                 UErrorCode *status);
717b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
718b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
71950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if U_SHOW_CPLUSPLUS_API
720b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
721b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Check the specified string for possible security issues.
72250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The text to be checked will typically be an identifier of some sort.
723b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The set of checks to be performed is specified with uspoof_setChecks().
724b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
725b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param sc      The USpoofChecker
7268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @param id      A identifier to be checked for possible security issues.
7278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @param position      An out parameter.
7288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                Originally, the index of the first string position that failed a check.
7298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                Now, always returns zero.
7308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                This parameter may be null.
7318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                @deprecated ICU 51
73250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param status  The error code, set if an error occurred while attempting to
733b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                perform the check.
734b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                Spoofing or security issues detected with the input string are
735b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                not reported here, but through the function's return value.
736b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @return        An integer value with bits set for any potential security
737b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                or spoofing issues detected.  The bits are defined by
7388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
7398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                will be zero if the input string passes all of the
7408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                enabled checks.
74150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2
742b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
74350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE int32_t U_EXPORT2
744b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_checkUnicodeString(const USpoofChecker *sc,
7458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                          const icu::UnicodeString &id,
746b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                          int32_t *position,
747b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                          UErrorCode *status);
748b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
749b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#endif
750b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
751b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
752b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
753b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Check the whether two specified strings are visually confusable.
754b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The types of confusability to be tested - single script, mixed script,
755b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * or whole script - are determined by the check options set for the
756b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * USpoofChecker.
757b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
758b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The tests to be performed are controlled by the flags
759b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *   USPOOF_SINGLE_SCRIPT_CONFUSABLE
760b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *   USPOOF_MIXED_SCRIPT_CONFUSABLE
761b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *   USPOOF_WHOLE_SCRIPT_CONFUSABLE
762b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * At least one of these tests must be selected.
763b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
764b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * USPOOF_ANY_CASE is a modifier for the tests.  Select it if the identifiers
765b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *   may be of mixed case.
766b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * If identifiers are case folded for comparison and
767b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * display to the user, do not select the USPOOF_ANY_CASE option.
768b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
769b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
770b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param sc      The USpoofChecker
7718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @param id1     The first of the two identifiers to be compared for
772b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                confusability.  The strings are in UTF-16 format.
7738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @param length1 the length of the first identifer, expressed in
774b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                16 bit UTF-16 code units, or -1 if the string is
7758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                nul terminated.
7768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @param id2     The second of the two identifiers to be compared for
7778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                confusability.  The identifiers are in UTF-16 format.
7788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @param length2 The length of the second identifiers, expressed in
779b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                16 bit UTF-16 code units, or -1 if the string is
7808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                nul terminated.
78150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param status  The error code, set if an error occurred while attempting to
782b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                perform the check.
7838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                Confusability of the identifiers is not reported here,
784b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                but through this function's return value.
785b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @return        An integer value with bit(s) set corresponding to
786b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                the type of confusability found, as defined by
7878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                enum USpoofChecks.  Zero is returned if the identifiers
788b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                are not confusable.
78950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2
790b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
79150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE int32_t U_EXPORT2
792b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_areConfusable(const USpoofChecker *sc,
7938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                     const UChar *id1, int32_t length1,
7948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                     const UChar *id2, int32_t length2,
795b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                     UErrorCode *status);
796b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
797b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
798b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
799b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
800b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Check the whether two specified strings are visually confusable.
801b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The types of confusability to be tested - single script, mixed script,
802b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * or whole script - are determined by the check options set for the
803b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * USpoofChecker.
804b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
805b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param sc      The USpoofChecker
8068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @param id1     The first of the two identifiers to be compared for
8078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                confusability.  The strings are in UTF-8 format.
8088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @param length1 the length of the first identifiers, in bytes, or -1
8098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                if the string is nul terminated.
8108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @param id2     The second of the two identifiers to be compared for
811b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                confusability.  The strings are in UTF-8 format.
812b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param length2 The length of the second string in bytes, or -1
8138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                if the string is nul terminated.
81450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param status  The error code, set if an error occurred while attempting to
815b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                perform the check.
816b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                Confusability of the strings is not reported here,
817b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                but through this function's return value.
818b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @return        An integer value with bit(s) set corresponding to
819b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                the type of confusability found, as defined by
820b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                enum USpoofChecks.  Zero is returned if the strings
821b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                are not confusable.
82250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2
823b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
82450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE int32_t U_EXPORT2
825b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_areConfusableUTF8(const USpoofChecker *sc,
8268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                         const char *id1, int32_t length1,
8278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                         const char *id2, int32_t length2,
828b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                         UErrorCode *status);
829b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
830b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
831b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
832b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
83350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if U_SHOW_CPLUSPLUS_API
834b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
835b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Check the whether two specified strings are visually confusable.
836b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The types of confusability to be tested - single script, mixed script,
837b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * or whole script - are determined by the check options set for the
838b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * USpoofChecker.
839b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
840b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param sc      The USpoofChecker
84159d709d503bab6e2b61931737e662dd293b40578ccornelius * @param s1     The first of the two identifiers to be compared for
8428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                confusability.  The strings are in UTF-8 format.
84359d709d503bab6e2b61931737e662dd293b40578ccornelius * @param s2     The second of the two identifiers to be compared for
844b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                confusability.  The strings are in UTF-8 format.
84550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param status  The error code, set if an error occurred while attempting to
846b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                perform the check.
8478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                Confusability of the identifiers is not reported here,
848b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                but through this function's return value.
849b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @return        An integer value with bit(s) set corresponding to
850b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                the type of confusability found, as defined by
8518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *                enum USpoofChecks.  Zero is returned if the identifiers
852b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                are not confusable.
85350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2
854b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
85550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE int32_t U_EXPORT2
856b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_areConfusableUnicodeString(const USpoofChecker *sc,
857103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                                  const icu::UnicodeString &s1,
858103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                                  const icu::UnicodeString &s2,
859b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                  UErrorCode *status);
860b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#endif
861b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
862b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
863b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
8648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *  Get the "skeleton" for an identifier.
8658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *  Skeletons are a transformation of the input identifier;
8668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *  Two identifiers are confusable if their skeletons are identical.
8678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *  See Unicode UAX #39 for additional information.
868b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *
869b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *  Using skeletons directly makes it possible to quickly check
870b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *  whether an identifier is confusable with any of some large
871b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *  set of existing identifiers, by creating an efficiently
872b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *  searchable collection of the skeletons.
873b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *
874b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @param sc      The USpoofChecker
875b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @param type    The type of skeleton, corresponding to which
876b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                of the Unicode confusable data tables to use.
877b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                The default is Mixed-Script, Lowercase.
878b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                Allowed options are USPOOF_SINGLE_SCRIPT_CONFUSABLE and
8791b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert  *                USPOOF_ANY_CASE.  The two flags may be ORed.
8808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * @param id      The input identifier whose skeleton will be computed.
8818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * @param length  The length of the input identifier, expressed in 16 bit
882b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                UTF-16 code units, or -1 if the string is zero terminated.
883b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @param dest    The output buffer, to receive the skeleton string.
884b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @param destCapacity  The length of the output buffer, in 16 bit units.
885b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                The destCapacity may be zero, in which case the function will
886b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                return the actual length of the skeleton.
88750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  * @param status  The error code, set if an error occurred while attempting to
888b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                perform the check.
889b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @return        The length of the skeleton string.  The returned length
890b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                is always that of the complete skeleton, even when the
891b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                supplied buffer is too small (or of zero length)
892b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *
89350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  * @stable ICU 4.2
894b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  */
89550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE int32_t U_EXPORT2
896b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_getSkeleton(const USpoofChecker *sc,
897b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                   uint32_t type,
8988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                   const UChar *id,  int32_t length,
899b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                   UChar *dest, int32_t destCapacity,
900b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                   UErrorCode *status);
901b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
902b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
9038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *  Get the "skeleton" for an identifier.
9048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *  Skeletons are a transformation of the input identifier;
9058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *  Two identifiers are confusable if their skeletons are identical.
9068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *  See Unicode UAX #39 for additional information.
907b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *
908b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *  Using skeletons directly makes it possible to quickly check
909b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *  whether an identifier is confusable with any of some large
910b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *  set of existing identifiers, by creating an efficiently
911b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *  searchable collection of the skeletons.
912b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *
913b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @param sc      The USpoofChecker
914b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @param type    The type of skeleton, corresponding to which
915b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                of the Unicode confusable data tables to use.
916b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                The default is Mixed-Script, Lowercase.
917b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                Allowed options are USPOOF_SINGLE_SCRIPT_CONFUSABLE and
918b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                USPOOF_ANY_CASE.  The two flags may be ORed.
9198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * @param id      The UTF-8 format identifier whose skeleton will be computed.
920b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @param length  The length of the input string, in bytes,
921b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                or -1 if the string is zero terminated.
922b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @param dest    The output buffer, to receive the skeleton string.
923b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @param destCapacity  The length of the output buffer, in bytes.
924b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                The destCapacity may be zero, in which case the function will
925b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                return the actual length of the skeleton.
92650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  * @param status  The error code, set if an error occurred while attempting to
927b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                perform the check.  Possible Errors include U_INVALID_CHAR_FOUND
928b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                   for invalid UTF-8 sequences, and
929b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                   U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small
930b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                   to hold the complete skeleton.
931b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @return        The length of the skeleton string, in bytes.  The returned length
932b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                is always that of the complete skeleton, even when the
933b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                supplied buffer is too small (or of zero length)
934b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *
93550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  * @stable ICU 4.2
936b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  */
93750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_STABLE int32_t U_EXPORT2
938b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_getSkeletonUTF8(const USpoofChecker *sc,
939b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                       uint32_t type,
9408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                       const char *id,  int32_t length,
941b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                       char *dest, int32_t destCapacity,
942b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                       UErrorCode *status);
943b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
94450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if U_SHOW_CPLUSPLUS_API
945b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
9468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *  Get the "skeleton" for an identifier.
9478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *  Skeletons are a transformation of the input identifier;
9488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *  Two identifiers are confusable if their skeletons are identical.
9498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *  See Unicode UAX #39 for additional information.
950b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *
951b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *  Using skeletons directly makes it possible to quickly check
952b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *  whether an identifier is confusable with any of some large
953b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *  set of existing identifiers, by creating an efficiently
954b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *  searchable collection of the skeletons.
955b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *
956b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @param sc      The USpoofChecker.
957b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @param type    The type of skeleton, corresponding to which
958b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                of the Unicode confusable data tables to use.
959b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                The default is Mixed-Script, Lowercase.
960b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                Allowed options are USPOOF_SINGLE_SCRIPT_CONFUSABLE and
9611b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert  *                USPOOF_ANY_CASE.  The two flags may be ORed.
9628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * @param id      The input identifier whose skeleton will be computed.
9638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * @param dest    The output identifier, to receive the skeleton string.
96450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  * @param status  The error code, set if an error occurred while attempting to
965b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *                perform the check.
966b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  * @return        A reference to the destination (skeleton) string.
967b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  *
96850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  * @stable ICU 4.2
969b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru  */
97054dcd9b6a06071f647dac967e9e267abb9410720Craig CorneliusU_I18N_API icu::UnicodeString & U_EXPORT2
971b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
972b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                uint32_t type,
9738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                                const icu::UnicodeString &id,
974103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                                icu::UnicodeString &dest,
975b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                                UErrorCode *status);
97650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif   /* U_SHOW_CPLUSPLUS_API */
977b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
9788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius/**
9798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * Get the set of Candidate Characters for Inclusion in Identifiers, as defined
9808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * in Unicode UAX #31, http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Inclusion_in_Identifiers
9818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *
9828393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
9838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * be deleted by the caller.
9848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *
9858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * @param status The error code, set if a problem occurs while creating the set.
9868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *
987f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  * @stable ICU 51
9888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  */
989f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusU_STABLE const USet * U_EXPORT2
9908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliususpoof_getInclusionSet(UErrorCode *status);
9918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
9928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius/**
9938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined
9948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * in Unicode UAX #31, http://www.unicode.org/reports/tr31/#Table_Recommended_Scripts
9958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *
9968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
9978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * be deleted by the caller.
9988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *
9998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * @param status The error code, set if a problem occurs while creating the set.
10008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *
1001f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  * @stable ICU 51
10028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  */
1003f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusU_STABLE const USet * U_EXPORT2
10048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliususpoof_getRecommendedSet(UErrorCode *status);
10058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
10068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#if U_SHOW_CPLUSPLUS_API
10078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
10088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius/**
10098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * Get the set of Candidate Characters for Inclusion in Identifiers, as defined
10108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * in Unicode UAX #31, http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Inclusion_in_Identifiers
10118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *
10128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
10138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * be deleted by the caller.
10148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *
10158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * @param status The error code, set if a problem occurs while creating the set.
10168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *
1017f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  * @stable ICU 51
10188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  */
1019f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusU_STABLE const icu::UnicodeSet * U_EXPORT2
10208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliususpoof_getInclusionUnicodeSet(UErrorCode *status);
10218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
10228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius/**
10238393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined
10248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * in Unicode UAX #31, http://www.unicode.org/reports/tr31/#Table_Recommended_Scripts
10258393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *
10268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
10278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * be deleted by the caller.
10288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *
10298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  * @param status The error code, set if a problem occurs while creating the set.
10308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  *
1031f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  * @stable ICU 51
10328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius  */
1033f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusU_STABLE const icu::UnicodeSet * U_EXPORT2
10348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliususpoof_getRecommendedUnicodeSet(UErrorCode *status);
10358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
10368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#endif /* U_SHOW_CPLUSPLUS_API */
10378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1038b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/**
1039b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * Serialize the data for a spoof detector into a chunk of memory.
1040b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * The flattened spoof detection tables can later be used to efficiently
1041b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * instantiate a new Spoof Detector.
1042b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
10438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * The serialized spoof checker includes only the data compiled from the
10448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * Unicode data tables by uspoof_openFromSource(); it does not include
10458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * include any other state or configuration that may have been set.
10468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *
1047b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param sc   the Spoof Detector whose data is to be serialized.
1048b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param data a pointer to 32-bit-aligned memory to be filled with the data,
1049b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *             can be NULL if capacity==0
1050b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param capacity the number of bytes available at data,
1051b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *                 or 0 for preflighting
1052b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @param status an in/out ICU UErrorCode; possible errors include:
1053b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization
1054b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * - U_ILLEGAL_ARGUMENT_ERROR  the data or capacity parameters are bad
1055b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @return the number of bytes written or needed for the spoof data
1056b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *
1057b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * @see utrie2_openFromSerialized()
105850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 4.2
1059b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */
106027f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE int32_t U_EXPORT2
1061b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_serialize(USpoofChecker *sc,
1062b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                 void *data, int32_t capacity,
1063b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                 UErrorCode *status);
1064b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
1065b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
1066b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#endif
1067b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
1068b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#endif   /* USPOOF_H */
1069