1// © 2017 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4// casemap.h
5// created: 2017jan12 Markus W. Scherer
6
7#ifndef __CASEMAP_H__
8#define __CASEMAP_H__
9
10#include "unicode/utypes.h"
11#include "unicode/stringpiece.h"
12#include "unicode/uobject.h"
13
14/**
15 * \file
16 * \brief C++ API: Low-level C++ case mapping functions.
17 */
18
19U_NAMESPACE_BEGIN
20
21#ifndef U_HIDE_DRAFT_API
22
23class BreakIterator;
24class ByteSink;
25class Edits;
26
27/**
28 * Low-level C++ case mapping functions.
29 *
30 * @draft ICU 59
31 */
32class U_COMMON_API CaseMap U_FINAL : public UMemory {
33public:
34    /**
35     * Lowercases a UTF-16 string and optionally records edits.
36     * Casing is locale-dependent and context-sensitive.
37     * The result may be longer or shorter than the original.
38     * The source string and the destination buffer must not overlap.
39     *
40     * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
41     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
42     * @param src       The original string.
43     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
44     * @param dest      A buffer for the result string. The result will be NUL-terminated if
45     *                  the buffer is large enough.
46     *                  The contents is undefined in case of failure.
47     * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
48     *                  dest may be NULL and the function will only return the length of the result
49     *                  without writing any of the result string.
50     * @param edits     Records edits for index mapping, working with styled text,
51     *                  and getting only changes (if any).
52     *                  The Edits contents is undefined if any error occurs.
53     *                  This function calls edits->reset() first unless
54     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
55     * @param errorCode Reference to an in/out error code value
56     *                  which must not indicate a failure before the function call.
57     * @return The length of the result string, if successful.
58     *         When the result would be longer than destCapacity,
59     *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
60     *
61     * @see u_strToLower
62     * @draft ICU 59
63     */
64     static int32_t toLower(
65            const char *locale, uint32_t options,
66            const char16_t *src, int32_t srcLength,
67            char16_t *dest, int32_t destCapacity, Edits *edits,
68            UErrorCode &errorCode);
69
70    /**
71     * Uppercases a UTF-16 string and optionally records edits.
72     * Casing is locale-dependent and context-sensitive.
73     * The result may be longer or shorter than the original.
74     * The source string and the destination buffer must not overlap.
75     *
76     * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
77     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
78     * @param src       The original string.
79     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
80     * @param dest      A buffer for the result string. The result will be NUL-terminated if
81     *                  the buffer is large enough.
82     *                  The contents is undefined in case of failure.
83     * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
84     *                  dest may be NULL and the function will only return the length of the result
85     *                  without writing any of the result string.
86     * @param edits     Records edits for index mapping, working with styled text,
87     *                  and getting only changes (if any).
88     *                  The Edits contents is undefined if any error occurs.
89     *                  This function calls edits->reset() first unless
90     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
91     * @param errorCode Reference to an in/out error code value
92     *                  which must not indicate a failure before the function call.
93     * @return The length of the result string, if successful.
94     *         When the result would be longer than destCapacity,
95     *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
96     *
97     * @see u_strToUpper
98     * @draft ICU 59
99     */
100    static int32_t toUpper(
101            const char *locale, uint32_t options,
102            const char16_t *src, int32_t srcLength,
103            char16_t *dest, int32_t destCapacity, Edits *edits,
104            UErrorCode &errorCode);
105
106#if !UCONFIG_NO_BREAK_ITERATION
107
108    /**
109     * Titlecases a UTF-16 string and optionally records edits.
110     * Casing is locale-dependent and context-sensitive.
111     * The result may be longer or shorter than the original.
112     * The source string and the destination buffer must not overlap.
113     *
114     * Titlecasing uses a break iterator to find the first characters of words
115     * that are to be titlecased. It titlecases those characters and lowercases
116     * all others. (This can be modified with options bits.)
117     *
118     * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
119     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
120     *                  U_TITLECASE_NO_LOWERCASE,
121     *                  U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
122     *                  U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
123     * @param iter      A break iterator to find the first characters of words that are to be titlecased.
124     *                  It is set to the source string (setText())
125     *                  and used one or more times for iteration (first() and next()).
126     *                  If NULL, then a word break iterator for the locale is used
127     *                  (or something equivalent).
128     * @param src       The original string.
129     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
130     * @param dest      A buffer for the result string. The result will be NUL-terminated if
131     *                  the buffer is large enough.
132     *                  The contents is undefined in case of failure.
133     * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
134     *                  dest may be NULL and the function will only return the length of the result
135     *                  without writing any of the result string.
136     * @param edits     Records edits for index mapping, working with styled text,
137     *                  and getting only changes (if any).
138     *                  The Edits contents is undefined if any error occurs.
139     *                  This function calls edits->reset() first unless
140     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
141     * @param errorCode Reference to an in/out error code value
142     *                  which must not indicate a failure before the function call.
143     * @return The length of the result string, if successful.
144     *         When the result would be longer than destCapacity,
145     *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
146     *
147     * @see u_strToTitle
148     * @see ucasemap_toTitle
149     * @draft ICU 59
150     */
151    static int32_t toTitle(
152            const char *locale, uint32_t options, BreakIterator *iter,
153            const char16_t *src, int32_t srcLength,
154            char16_t *dest, int32_t destCapacity, Edits *edits,
155            UErrorCode &errorCode);
156
157#endif  // UCONFIG_NO_BREAK_ITERATION
158
159    /**
160     * Case-folds a UTF-16 string and optionally records edits.
161     *
162     * Case folding is locale-independent and not context-sensitive,
163     * but there is an option for whether to include or exclude mappings for dotted I
164     * and dotless i that are marked with 'T' in CaseFolding.txt.
165     *
166     * The result may be longer or shorter than the original.
167     * The source string and the destination buffer must not overlap.
168     *
169     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
170     *                  U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
171     * @param src       The original string.
172     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
173     * @param dest      A buffer for the result string. The result will be NUL-terminated if
174     *                  the buffer is large enough.
175     *                  The contents is undefined in case of failure.
176     * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
177     *                  dest may be NULL and the function will only return the length of the result
178     *                  without writing any of the result string.
179     * @param edits     Records edits for index mapping, working with styled text,
180     *                  and getting only changes (if any).
181     *                  The Edits contents is undefined if any error occurs.
182     *                  This function calls edits->reset() first unless
183     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
184     * @param errorCode Reference to an in/out error code value
185     *                  which must not indicate a failure before the function call.
186     * @return The length of the result string, if successful.
187     *         When the result would be longer than destCapacity,
188     *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
189     *
190     * @see u_strFoldCase
191     * @draft ICU 59
192     */
193    static int32_t fold(
194            uint32_t options,
195            const char16_t *src, int32_t srcLength,
196            char16_t *dest, int32_t destCapacity, Edits *edits,
197            UErrorCode &errorCode);
198
199    /**
200     * Lowercases a UTF-8 string and optionally records edits.
201     * Casing is locale-dependent and context-sensitive.
202     * The result may be longer or shorter than the original.
203     *
204     * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
205     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
206     * @param src       The original string.
207     * @param sink      A ByteSink to which the result string is written.
208     *                  sink.Flush() is called at the end.
209     * @param edits     Records edits for index mapping, working with styled text,
210     *                  and getting only changes (if any).
211     *                  The Edits contents is undefined if any error occurs.
212     *                  This function calls edits->reset() first unless
213     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
214     * @param errorCode Reference to an in/out error code value
215     *                  which must not indicate a failure before the function call.
216     *
217     * @see ucasemap_utf8ToLower
218     * @draft ICU 60
219     */
220    static void utf8ToLower(
221            const char *locale, uint32_t options,
222            StringPiece src, ByteSink &sink, Edits *edits,
223            UErrorCode &errorCode);
224
225    /**
226     * Uppercases a UTF-8 string and optionally records edits.
227     * Casing is locale-dependent and context-sensitive.
228     * The result may be longer or shorter than the original.
229     *
230     * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
231     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
232     * @param src       The original string.
233     * @param sink      A ByteSink to which the result string is written.
234     *                  sink.Flush() is called at the end.
235     * @param edits     Records edits for index mapping, working with styled text,
236     *                  and getting only changes (if any).
237     *                  The Edits contents is undefined if any error occurs.
238     *                  This function calls edits->reset() first unless
239     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
240     * @param errorCode Reference to an in/out error code value
241     *                  which must not indicate a failure before the function call.
242     *
243     * @see ucasemap_utf8ToUpper
244     * @draft ICU 60
245     */
246    static void utf8ToUpper(
247            const char *locale, uint32_t options,
248            StringPiece src, ByteSink &sink, Edits *edits,
249            UErrorCode &errorCode);
250
251#if !UCONFIG_NO_BREAK_ITERATION
252
253    /**
254     * Titlecases a UTF-8 string and optionally records edits.
255     * Casing is locale-dependent and context-sensitive.
256     * The result may be longer or shorter than the original.
257     *
258     * Titlecasing uses a break iterator to find the first characters of words
259     * that are to be titlecased. It titlecases those characters and lowercases
260     * all others. (This can be modified with options bits.)
261     *
262     * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
263     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
264     *                  U_TITLECASE_NO_LOWERCASE,
265     *                  U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
266     *                  U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
267     * @param iter      A break iterator to find the first characters of words that are to be titlecased.
268     *                  It is set to the source string (setUText())
269     *                  and used one or more times for iteration (first() and next()).
270     *                  If NULL, then a word break iterator for the locale is used
271     *                  (or something equivalent).
272     * @param src       The original string.
273     * @param sink      A ByteSink to which the result string is written.
274     *                  sink.Flush() is called at the end.
275     * @param edits     Records edits for index mapping, working with styled text,
276     *                  and getting only changes (if any).
277     *                  The Edits contents is undefined if any error occurs.
278     *                  This function calls edits->reset() first unless
279     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
280     * @param errorCode Reference to an in/out error code value
281     *                  which must not indicate a failure before the function call.
282     *
283     * @see ucasemap_utf8ToTitle
284     * @draft ICU 60
285     */
286    static void utf8ToTitle(
287            const char *locale, uint32_t options, BreakIterator *iter,
288            StringPiece src, ByteSink &sink, Edits *edits,
289            UErrorCode &errorCode);
290
291#endif  // UCONFIG_NO_BREAK_ITERATION
292
293    /**
294     * Case-folds a UTF-8 string and optionally records edits.
295     *
296     * Case folding is locale-independent and not context-sensitive,
297     * but there is an option for whether to include or exclude mappings for dotted I
298     * and dotless i that are marked with 'T' in CaseFolding.txt.
299     *
300     * The result may be longer or shorter than the original.
301     *
302     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
303     * @param src       The original string.
304     * @param sink      A ByteSink to which the result string is written.
305     *                  sink.Flush() is called at the end.
306     * @param edits     Records edits for index mapping, working with styled text,
307     *                  and getting only changes (if any).
308     *                  The Edits contents is undefined if any error occurs.
309     *                  This function calls edits->reset() first unless
310     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
311     * @param errorCode Reference to an in/out error code value
312     *                  which must not indicate a failure before the function call.
313     *
314     * @see ucasemap_utf8FoldCase
315     * @draft ICU 60
316     */
317    static void utf8Fold(
318            uint32_t options,
319            StringPiece src, ByteSink &sink, Edits *edits,
320            UErrorCode &errorCode);
321
322    /**
323     * Lowercases a UTF-8 string and optionally records edits.
324     * Casing is locale-dependent and context-sensitive.
325     * The result may be longer or shorter than the original.
326     * The source string and the destination buffer must not overlap.
327     *
328     * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
329     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
330     * @param src       The original string.
331     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
332     * @param dest      A buffer for the result string. The result will be NUL-terminated if
333     *                  the buffer is large enough.
334     *                  The contents is undefined in case of failure.
335     * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
336     *                  dest may be NULL and the function will only return the length of the result
337     *                  without writing any of the result string.
338     * @param edits     Records edits for index mapping, working with styled text,
339     *                  and getting only changes (if any).
340     *                  The Edits contents is undefined if any error occurs.
341     *                  This function calls edits->reset() first unless
342     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
343     * @param errorCode Reference to an in/out error code value
344     *                  which must not indicate a failure before the function call.
345     * @return The length of the result string, if successful.
346     *         When the result would be longer than destCapacity,
347     *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
348     *
349     * @see ucasemap_utf8ToLower
350     * @draft ICU 59
351     */
352    static int32_t utf8ToLower(
353            const char *locale, uint32_t options,
354            const char *src, int32_t srcLength,
355            char *dest, int32_t destCapacity, Edits *edits,
356            UErrorCode &errorCode);
357
358    /**
359     * Uppercases a UTF-8 string and optionally records edits.
360     * Casing is locale-dependent and context-sensitive.
361     * The result may be longer or shorter than the original.
362     * The source string and the destination buffer must not overlap.
363     *
364     * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
365     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
366     * @param src       The original string.
367     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
368     * @param dest      A buffer for the result string. The result will be NUL-terminated if
369     *                  the buffer is large enough.
370     *                  The contents is undefined in case of failure.
371     * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
372     *                  dest may be NULL and the function will only return the length of the result
373     *                  without writing any of the result string.
374     * @param edits     Records edits for index mapping, working with styled text,
375     *                  and getting only changes (if any).
376     *                  The Edits contents is undefined if any error occurs.
377     *                  This function calls edits->reset() first unless
378     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
379     * @param errorCode Reference to an in/out error code value
380     *                  which must not indicate a failure before the function call.
381     * @return The length of the result string, if successful.
382     *         When the result would be longer than destCapacity,
383     *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
384     *
385     * @see ucasemap_utf8ToUpper
386     * @draft ICU 59
387     */
388    static int32_t utf8ToUpper(
389            const char *locale, uint32_t options,
390            const char *src, int32_t srcLength,
391            char *dest, int32_t destCapacity, Edits *edits,
392            UErrorCode &errorCode);
393
394#if !UCONFIG_NO_BREAK_ITERATION
395
396    /**
397     * Titlecases a UTF-8 string and optionally records edits.
398     * Casing is locale-dependent and context-sensitive.
399     * The result may be longer or shorter than the original.
400     * The source string and the destination buffer must not overlap.
401     *
402     * Titlecasing uses a break iterator to find the first characters of words
403     * that are to be titlecased. It titlecases those characters and lowercases
404     * all others. (This can be modified with options bits.)
405     *
406     * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
407     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
408     *                  U_TITLECASE_NO_LOWERCASE,
409     *                  U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
410     *                  U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
411     * @param iter      A break iterator to find the first characters of words that are to be titlecased.
412     *                  It is set to the source string (setUText())
413     *                  and used one or more times for iteration (first() and next()).
414     *                  If NULL, then a word break iterator for the locale is used
415     *                  (or something equivalent).
416     * @param src       The original string.
417     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
418     * @param dest      A buffer for the result string. The result will be NUL-terminated if
419     *                  the buffer is large enough.
420     *                  The contents is undefined in case of failure.
421     * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
422     *                  dest may be NULL and the function will only return the length of the result
423     *                  without writing any of the result string.
424     * @param edits     Records edits for index mapping, working with styled text,
425     *                  and getting only changes (if any).
426     *                  The Edits contents is undefined if any error occurs.
427     *                  This function calls edits->reset() first unless
428     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
429     * @param errorCode Reference to an in/out error code value
430     *                  which must not indicate a failure before the function call.
431     * @return The length of the result string, if successful.
432     *         When the result would be longer than destCapacity,
433     *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
434     *
435     * @see ucasemap_utf8ToTitle
436     * @draft ICU 59
437     */
438    static int32_t utf8ToTitle(
439            const char *locale, uint32_t options, BreakIterator *iter,
440            const char *src, int32_t srcLength,
441            char *dest, int32_t destCapacity, Edits *edits,
442            UErrorCode &errorCode);
443
444#endif  // UCONFIG_NO_BREAK_ITERATION
445
446    /**
447     * Case-folds a UTF-8 string and optionally records edits.
448     *
449     * Case folding is locale-independent and not context-sensitive,
450     * but there is an option for whether to include or exclude mappings for dotted I
451     * and dotless i that are marked with 'T' in CaseFolding.txt.
452     *
453     * The result may be longer or shorter than the original.
454     * The source string and the destination buffer must not overlap.
455     *
456     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
457     *                  U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
458     * @param src       The original string.
459     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
460     * @param dest      A buffer for the result string. The result will be NUL-terminated if
461     *                  the buffer is large enough.
462     *                  The contents is undefined in case of failure.
463     * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
464     *                  dest may be NULL and the function will only return the length of the result
465     *                  without writing any of the result string.
466     * @param edits     Records edits for index mapping, working with styled text,
467     *                  and getting only changes (if any).
468     *                  The Edits contents is undefined if any error occurs.
469     *                  This function calls edits->reset() first unless
470     *                  options includes U_EDITS_NO_RESET. edits can be NULL.
471     * @param errorCode Reference to an in/out error code value
472     *                  which must not indicate a failure before the function call.
473     * @return The length of the result string, if successful.
474     *         When the result would be longer than destCapacity,
475     *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
476     *
477     * @see ucasemap_utf8FoldCase
478     * @draft ICU 59
479     */
480    static int32_t utf8Fold(
481            uint32_t options,
482            const char *src, int32_t srcLength,
483            char *dest, int32_t destCapacity, Edits *edits,
484            UErrorCode &errorCode);
485
486private:
487    CaseMap() = delete;
488    CaseMap(const CaseMap &other) = delete;
489    CaseMap &operator=(const CaseMap &other) = delete;
490};
491
492#endif  // U_HIDE_DRAFT_API
493
494U_NAMESPACE_END
495
496#endif  // __CASEMAP_H__
497