1/*
2**********************************************************************
3*   Copyright (C) 1999-2015, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*   file name:  ustr_imp.h
7*   encoding:   US-ASCII
8*   tab size:   8 (not used)
9*   indentation:4
10*
11*   created on: 2001jan30
12*   created by: Markus W. Scherer
13*/
14
15#ifndef __USTR_IMP_H__
16#define __USTR_IMP_H__
17
18#include "unicode/utypes.h"
19#include "unicode/uiter.h"
20#include "ucase.h"
21
22/** Simple declaration to avoid including unicode/ubrk.h. */
23#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
24#   define UBRK_TYPEDEF_UBREAK_ITERATOR
25    typedef struct UBreakIterator UBreakIterator;
26#endif
27
28#ifndef U_COMPARE_IGNORE_CASE
29/* see also unorm.h */
30/**
31 * Option bit for unorm_compare:
32 * Perform case-insensitive comparison.
33 */
34#define U_COMPARE_IGNORE_CASE       0x10000
35#endif
36
37/**
38 * Internal option for unorm_cmpEquivFold() for strncmp style.
39 * If set, checks for both string length and terminating NUL.
40 */
41#define _STRNCMP_STYLE 0x1000
42
43/**
44 * Compare two strings in code point order or code unit order.
45 * Works in strcmp style (both lengths -1),
46 * strncmp style (lengths equal and >=0, flag TRUE),
47 * and memcmp/UnicodeString style (at least one length >=0).
48 */
49U_CFUNC int32_t U_EXPORT2
50uprv_strCompare(const UChar *s1, int32_t length1,
51                const UChar *s2, int32_t length2,
52                UBool strncmpStyle, UBool codePointOrder);
53
54/**
55 * Internal API, used by u_strcasecmp() etc.
56 * Compare strings case-insensitively,
57 * in code point order or code unit order.
58 */
59U_CFUNC int32_t
60u_strcmpFold(const UChar *s1, int32_t length1,
61             const UChar *s2, int32_t length2,
62             uint32_t options,
63             UErrorCode *pErrorCode);
64
65/**
66 * Interanl API, used for detecting length of
67 * shared prefix case-insensitively.
68 * @param s1            input string 1
69 * @param length1       length of string 1, or -1 (NULL terminated)
70 * @param s2            input string 2
71 * @param length2       length of string 2, or -1 (NULL terminated)
72 * @param options       compare options
73 * @param matchLen1     (output) length of partial prefix match in s1
74 * @param matchLen2     (output) length of partial prefix match in s2
75 * @param pErrorCode    receives error status
76 */
77U_CAPI void
78u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
79                             const UChar *s2, int32_t length2,
80                             uint32_t options,
81                             int32_t *matchLen1, int32_t *matchLen2,
82                             UErrorCode *pErrorCode);
83
84/**
85 * Are the Unicode properties loaded?
86 * This must be used before internal functions are called that do
87 * not perform this check.
88 * Generate a debug assertion failure if data is not loaded.
89 */
90U_CFUNC UBool
91uprv_haveProperties(UErrorCode *pErrorCode);
92
93/**
94  * Load the Unicode property data.
95  * Intended primarily for use from u_init().
96  * Has no effect if property data is already loaded.
97  * NOT thread safe.
98  */
99/*U_CFUNC int8_t
100uprv_loadPropsData(UErrorCode *errorCode);*/
101
102/*
103 * Internal string casing functions implementing
104 * ustring.h/ustrcase.c and UnicodeString case mapping functions.
105 */
106
107struct UCaseMap {
108    const UCaseProps *csp;
109#if !UCONFIG_NO_BREAK_ITERATION
110    UBreakIterator *iter;  /* We adopt the iterator, so we own it. */
111#endif
112    char locale[32];
113    int32_t locCache;
114    uint32_t options;
115};
116
117#ifndef __UCASEMAP_H__
118typedef struct UCaseMap UCaseMap;
119#endif
120
121#if UCONFIG_NO_BREAK_ITERATION
122#   define UCASEMAP_INITIALIZER { NULL, { 0 }, 0, 0 }
123#else
124#   define UCASEMAP_INITIALIZER { NULL, NULL, { 0 }, 0, 0 }
125#endif
126
127U_CFUNC void
128ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale);
129
130#ifndef U_STRING_CASE_MAPPER_DEFINED
131#define U_STRING_CASE_MAPPER_DEFINED
132
133/**
134 * String case mapping function type, used by ustrcase_map().
135 * All error checking must be done.
136 * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
137 * src and dest must not overlap.
138 */
139typedef int32_t U_CALLCONV
140UStringCaseMapper(const UCaseMap *csm,
141                  UChar *dest, int32_t destCapacity,
142                  const UChar *src, int32_t srcLength,
143                  UErrorCode *pErrorCode);
144
145#endif
146
147/** Implements UStringCaseMapper. */
148U_CFUNC int32_t U_CALLCONV
149ustrcase_internalToLower(const UCaseMap *csm,
150                         UChar *dest, int32_t destCapacity,
151                         const UChar *src, int32_t srcLength,
152                         UErrorCode *pErrorCode);
153
154/** Implements UStringCaseMapper. */
155U_CFUNC int32_t U_CALLCONV
156ustrcase_internalToUpper(const UCaseMap *csm,
157                         UChar *dest, int32_t destCapacity,
158                         const UChar *src, int32_t srcLength,
159                         UErrorCode *pErrorCode);
160
161#if !UCONFIG_NO_BREAK_ITERATION
162
163/** Implements UStringCaseMapper. */
164U_CFUNC int32_t U_CALLCONV
165ustrcase_internalToTitle(const UCaseMap *csm,
166                         UChar *dest, int32_t destCapacity,
167                         const UChar *src, int32_t srcLength,
168                         UErrorCode *pErrorCode);
169
170#endif
171
172/** Implements UStringCaseMapper. */
173U_CFUNC int32_t U_CALLCONV
174ustrcase_internalFold(const UCaseMap *csm,
175                      UChar *dest, int32_t destCapacity,
176                      const UChar *src, int32_t srcLength,
177                      UErrorCode *pErrorCode);
178
179/**
180 * Implements argument checking and buffer handling
181 * for string case mapping as a common function.
182 */
183U_CFUNC int32_t
184ustrcase_map(const UCaseMap *csm,
185             UChar *dest, int32_t destCapacity,
186             const UChar *src, int32_t srcLength,
187             UStringCaseMapper *stringCaseMapper,
188             UErrorCode *pErrorCode);
189
190/**
191 * UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
192 * UTF-8 version of UStringCaseMapper.
193 * All error checking must be done.
194 * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
195 * src and dest must not overlap.
196 */
197typedef int32_t U_CALLCONV
198UTF8CaseMapper(const UCaseMap *csm,
199               uint8_t *dest, int32_t destCapacity,
200               const uint8_t *src, int32_t srcLength,
201               UErrorCode *pErrorCode);
202
203/** Implements UTF8CaseMapper. */
204U_CFUNC int32_t U_CALLCONV
205ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
206         uint8_t *dest, int32_t destCapacity,
207         const uint8_t *src, int32_t srcLength,
208         UErrorCode *pErrorCode);
209
210/**
211 * Implements argument checking and buffer handling
212 * for UTF-8 string case mapping as a common function.
213 */
214U_CFUNC int32_t
215ucasemap_mapUTF8(const UCaseMap *csm,
216                 uint8_t *dest, int32_t destCapacity,
217                 const uint8_t *src, int32_t srcLength,
218                 UTF8CaseMapper *stringCaseMapper,
219                 UErrorCode *pErrorCode);
220
221U_CAPI int32_t U_EXPORT2
222ustr_hashUCharsN(const UChar *str, int32_t length);
223
224U_CAPI int32_t U_EXPORT2
225ustr_hashCharsN(const char *str, int32_t length);
226
227U_CAPI int32_t U_EXPORT2
228ustr_hashICharsN(const char *str, int32_t length);
229
230/**
231 * NUL-terminate a UChar * string if possible.
232 * If length  < destCapacity then NUL-terminate.
233 * If length == destCapacity then do not terminate but set U_STRING_NOT_TERMINATED_WARNING.
234 * If length  > destCapacity then do not terminate but set U_BUFFER_OVERFLOW_ERROR.
235 *
236 * @param dest Destination buffer, can be NULL if destCapacity==0.
237 * @param destCapacity Number of UChars available at dest.
238 * @param length Number of UChars that were (to be) written to dest.
239 * @param pErrorCode ICU error code.
240 * @return length
241 */
242U_CAPI int32_t U_EXPORT2
243u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
244
245/**
246 * NUL-terminate a char * string if possible.
247 * Same as u_terminateUChars() but for a different string type.
248 */
249U_CAPI int32_t U_EXPORT2
250u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
251
252/**
253 * NUL-terminate a UChar32 * string if possible.
254 * Same as u_terminateUChars() but for a different string type.
255 */
256U_CAPI int32_t U_EXPORT2
257u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
258
259/**
260 * NUL-terminate a wchar_t * string if possible.
261 * Same as u_terminateUChars() but for a different string type.
262 */
263U_CAPI int32_t U_EXPORT2
264u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);
265
266#endif
267