1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Copyright (C) 2000-2002, International Business Machines
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   file name:  ustring.c
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   encoding:   US-ASCII
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   tab size:   8 (not used)
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   indentation:4
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created on: 2000aug15
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created by: Markus W. Scherer
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   This file contains sample code that illustrates the use of Unicode strings
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   with ICU.
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdio.h>
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uchar.h"
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/locid.h"
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ustring.h"
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv.h"
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unistr.h"
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// helper functions -------------------------------------------------------- ***
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// default converter for the platform encoding
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UConverter *cnv=NULL;
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruprintUString(const char *announce, const UChar *s, int32_t length) {
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static char out[200];
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 c;
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i;
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode errorCode=U_ZERO_ERROR;
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Convert to the "platform encoding". See notes in printUnicodeString().
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * ucnv_fromUChars(), like most ICU APIs understands length==-1
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * to mean that the string is NUL-terminated.
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_fromUChars(cnv, out, sizeof(out), s, length, &errorCode);
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(errorCode) || errorCode==U_STRING_NOT_TERMINATED_WARNING) {
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("%sproblem converting string from Unicode: %s\n", announce, u_errorName(errorCode));
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("%s%s {", announce, out);
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* output the code points (not code units) */
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(length>=0) {
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* s is not NUL-terminated */
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(i=0; i<length; /* U16_NEXT post-increments */) {
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            U16_NEXT(s, i, length, c);
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            printf(" %04x", c);
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* s is NUL-terminated */
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(i=0; /* condition in loop body */; /* U16_NEXT post-increments */) {
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            U16_NEXT(s, i, length, c);
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c==0) {
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            printf(" %04x", c);
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf(" }\n");
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruprintUnicodeString(const char *announce, const UnicodeString &s) {
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static char out[200];
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i, length;
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // output the string, converted to the platform encoding
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Note for Windows: The "platform encoding" defaults to the "ANSI codepage",
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // which is different from the "OEM codepage" in the console window.
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // However, if you pipe the output into a file and look at it with Notepad
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // or similar, then "ANSI" characters will show correctly.
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Production code should be aware of what encoding is required,
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // and use a UConverter or at least a charset name explicitly.
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    out[s.extract(0, 99, out)]=0;
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("%s%s {", announce, out);
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // output the code units (not code points)
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    length=s.length();
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i=0; i<length; ++i) {
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf(" %04x", s.charAt(i));
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf(" }\n");
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// sample code for utf.h macros -------------------------------------------- ***
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querudemo_utf_h_macros() {
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static UChar input[]={ 0x0061, 0xd800, 0xdc00, 0xdbff, 0xdfff, 0x0062 };
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 c;
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i;
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool isError;
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("\n* demo_utf_h_macros() -------------- ***\n\n");
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUString("iterate forward through: ", input, LENGTHOF(input));
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i=0; i<LENGTHOF(input); /* U16_NEXT post-increments */) {
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* Iterating forwards
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           Codepoint at offset 0: U+0061
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           Codepoint at offset 1: U+10000
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           Codepoint at offset 3: U+10ffff
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           Codepoint at offset 5: U+0062
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        */
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("Codepoint at offset %d: U+", i);
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        U16_NEXT(input, i, LENGTHOF(input), c);
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("%04x\n", c);
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    puts("");
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    isError=FALSE;
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    i=1; /* write position, gets post-incremented so needs to be in an l-value */
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    U16_APPEND(input, i, LENGTHOF(input), 0x0062, isError);
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUString("iterate backward through: ", input, LENGTHOF(input));
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i=LENGTHOF(input); i>0; /* U16_PREV pre-decrements */) {
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        U16_PREV(input, 0, i, c);
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* Iterating backwards
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           Codepoint at offset 5: U+0062
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           Codepoint at offset 3: U+10ffff
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           Codepoint at offset 2: U+dc00 -- unpaired surrogate because lead surr. overwritten
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           Codepoint at offset 1: U+0062 -- by this BMP code point
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           Codepoint at offset 0: U+0061
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        */
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("Codepoint at offset %d: U+%04x\n", i, c);
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// sample code for Unicode strings in C ------------------------------------ ***
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void demo_C_Unicode_strings() {
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("\n* demo_C_Unicode_strings() --------- ***\n\n");
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static const UChar text[]={ 0x41, 0x42, 0x43, 0 };          /* "ABC" */
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static const UChar appendText[]={ 0x61, 0x62, 0x63, 0 };    /* "abc" */
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static const UChar cmpText[]={ 0x61, 0x53, 0x73, 0x43, 0 }; /* "aSsC" */
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar buffer[32];
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t compare;
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t length=u_strlen(text); /* length=3 */
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* simple ANSI C-style functions */
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    buffer[0]=0;                    /* empty, NUL-terminated string */
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    u_strncat(buffer, text, 1);     /* append just n=1 character ('A') */
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    u_strcat(buffer, appendText);   /* buffer=="Aabc" */
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    length=u_strlen(buffer);        /* length=4 */
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUString("should be \"Aabc\": ", buffer, -1);
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* bitwise comparing buffer with text */
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    compare=u_strcmp(buffer, text);
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(compare<=0) {
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("String comparison error, expected \"Aabc\" > \"ABC\"\n");
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* Build "A<sharp s>C" in the buffer... */
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    u_strcpy(buffer, text);
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    buffer[1]=0xdf; /* sharp s, case-compares equal to "ss" */
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUString("should be \"A<sharp s>C\": ", buffer, -1);
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* Compare two strings case-insensitively using full case folding */
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    compare=u_strcasecmp(buffer, cmpText, U_FOLD_CASE_DEFAULT);
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(compare!=0) {
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("String case insensitive comparison error, expected \"AbC\" to be equal to \"ABC\"\n");
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// sample code for case mappings with C APIs -------------------------------- ***
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void demoCaseMapInC() {
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * input=
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *   "aB<capital sigma>"
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *   "iI<small dotless i><capital dotted I> "
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *   "<sharp s> <small lig. ffi>"
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *   "<small final sigma><small sigma><capital sigma>"
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static const UChar input[]={
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        0x61, 0x42, 0x3a3,
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        0x69, 0x49, 0x131, 0x130, 0x20,
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        0xdf, 0x20, 0xfb03,
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        0x3c2, 0x3c3, 0x3a3, 0
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    };
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar buffer[32];
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode errorCode;
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 c;
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i, j, length;
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool isError;
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("\n* demoCaseMapInC() ----------------- ***\n\n");
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * First, use simple case mapping functions which provide
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * 1:1 code point mappings without context/locale ID.
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Note that some mappings will not be "right" because some "real"
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * case mappings require context, depend on the locale ID,
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * and/or result in a change in the number of code points.
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUString("input string: ", input, -1);
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* uppercase */
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    isError=FALSE;
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i=j=0; j<LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(c==0) {
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break; /* stop at terminating NUL, no need to terminate buffer */
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        c=u_toupper(c);
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        U16_APPEND(buffer, j, LENGTHOF(buffer), c, isError);
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUString("simple-uppercased: ", buffer, j);
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* lowercase */
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    isError=FALSE;
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i=j=0; j<LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(c==0) {
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break; /* stop at terminating NUL, no need to terminate buffer */
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        c=u_tolower(c);
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        U16_APPEND(buffer, j, LENGTHOF(buffer), c, isError);
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUString("simple-lowercased: ", buffer, j);
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* titlecase */
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    isError=FALSE;
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i=j=0; j<LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(c==0) {
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break; /* stop at terminating NUL, no need to terminate buffer */
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        c=u_totitle(c);
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        U16_APPEND(buffer, j, LENGTHOF(buffer), c, isError);
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUString("simple-titlecased: ", buffer, j);
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* case-fold/default */
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    isError=FALSE;
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i=j=0; j<LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(c==0) {
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break; /* stop at terminating NUL, no need to terminate buffer */
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        c=u_foldCase(c, U_FOLD_CASE_DEFAULT);
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        U16_APPEND(buffer, j, LENGTHOF(buffer), c, isError);
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUString("simple-case-folded/default: ", buffer, j);
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* case-fold/Turkic */
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    isError=FALSE;
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i=j=0; j<LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(c==0) {
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break; /* stop at terminating NUL, no need to terminate buffer */
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        c=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        U16_APPEND(buffer, j, LENGTHOF(buffer), c, isError);
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUString("simple-case-folded/Turkic: ", buffer, j);
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Second, use full case mapping functions which provide
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * 1:n code point mappings (n can be 0!) and are sensitive to context and locale ID.
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Note that lower/upper/titlecasing take a locale ID while case-folding
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * has bit flag options instead, by design of the Unicode SpecialCasing.txt UCD file.
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Also, string titlecasing requires a BreakIterator to find starts of words.
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The sample code here passes in a NULL pointer; u_strToTitle() will open and close a default
280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * titlecasing BreakIterator automatically.
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * For production code where many strings are titlecased it would be more efficient
282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * to open a BreakIterator externally and pass it in.
283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUString("\ninput string: ", input, -1);
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* lowercase/English */
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    length=u_strToLower(buffer, LENGTHOF(buffer), input, -1, "en", &errorCode);
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_SUCCESS(errorCode)) {
290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printUString("full-lowercased/en: ", buffer, length);
291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("error in u_strToLower(en)=%ld error=%s\n", length, u_errorName(errorCode));
293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* lowercase/Turkish */
295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    length=u_strToLower(buffer, LENGTHOF(buffer), input, -1, "tr", &errorCode);
297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_SUCCESS(errorCode)) {
298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printUString("full-lowercased/tr: ", buffer, length);
299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("error in u_strToLower(tr)=%ld error=%s\n", length, u_errorName(errorCode));
301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* uppercase/English */
303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    length=u_strToUpper(buffer, LENGTHOF(buffer), input, -1, "en", &errorCode);
305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_SUCCESS(errorCode)) {
306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printUString("full-uppercased/en: ", buffer, length);
307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("error in u_strToUpper(en)=%ld error=%s\n", length, u_errorName(errorCode));
309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* uppercase/Turkish */
311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    length=u_strToUpper(buffer, LENGTHOF(buffer), input, -1, "tr", &errorCode);
313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_SUCCESS(errorCode)) {
314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printUString("full-uppercased/tr: ", buffer, length);
315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("error in u_strToUpper(tr)=%ld error=%s\n", length, u_errorName(errorCode));
317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* titlecase/English */
319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    length=u_strToTitle(buffer, LENGTHOF(buffer), input, -1, NULL, "en", &errorCode);
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_SUCCESS(errorCode)) {
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printUString("full-titlecased/en: ", buffer, length);
323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("error in u_strToTitle(en)=%ld error=%s\n", length, u_errorName(errorCode));
325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* titlecase/Turkish */
327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    length=u_strToTitle(buffer, LENGTHOF(buffer), input, -1, NULL, "tr", &errorCode);
329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_SUCCESS(errorCode)) {
330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printUString("full-titlecased/tr: ", buffer, length);
331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("error in u_strToTitle(tr)=%ld error=%s\n", length, u_errorName(errorCode));
333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* case-fold/default */
335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    length=u_strFoldCase(buffer, LENGTHOF(buffer), input, -1, U_FOLD_CASE_DEFAULT, &errorCode);
337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_SUCCESS(errorCode)) {
338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printUString("full-case-folded/default: ", buffer, length);
339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("error in u_strFoldCase(default)=%ld error=%s\n", length, u_errorName(errorCode));
341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* case-fold/Turkic */
343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    length=u_strFoldCase(buffer, LENGTHOF(buffer), input, -1, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_SUCCESS(errorCode)) {
346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printUString("full-case-folded/Turkic: ", buffer, length);
347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("error in u_strFoldCase(Turkic)=%ld error=%s\n", length, u_errorName(errorCode));
349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// sample code for case mappings with C++ APIs ------------------------------ ***
353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void demoCaseMapInCPlusPlus() {
355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * input=
357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *   "aB<capital sigma>"
358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *   "iI<small dotless i><capital dotted I> "
359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *   "<sharp s> <small lig. ffi>"
360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *   "<small final sigma><small sigma><capital sigma>"
361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static const UChar input[]={
363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        0x61, 0x42, 0x3a3,
364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        0x69, 0x49, 0x131, 0x130, 0x20,
365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        0xdf, 0x20, 0xfb03,
366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        0x3c2, 0x3c3, 0x3a3, 0
367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    };
368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("\n* demoCaseMapInCPlusPlus() --------- ***\n\n");
370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString s(input), t;
372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const Locale &en=Locale::getEnglish();
373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Locale tr("tr");
374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Full case mappings as in demoCaseMapInC(), using UnicodeString functions.
377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * These functions modify the string object itself.
378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Since we want to keep the input string around, we copy it each time
379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * and case-map the copy.
380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUnicodeString("input string: ", s);
382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* lowercase/English */
384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUnicodeString("full-lowercased/en: ", (t=s).toLower(en));
385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* lowercase/Turkish */
386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUnicodeString("full-lowercased/tr: ", (t=s).toLower(tr));
387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* uppercase/English */
388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUnicodeString("full-uppercased/en: ", (t=s).toUpper(en));
389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* uppercase/Turkish */
390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUnicodeString("full-uppercased/tr: ", (t=s).toUpper(tr));
391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* titlecase/English */
392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUnicodeString("full-titlecased/en: ", (t=s).toTitle(NULL, en));
393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* titlecase/Turkish */
394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUnicodeString("full-titlecased/tr: ", (t=s).toTitle(NULL, tr));
395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* case-folde/default */
396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUnicodeString("full-case-folded/default: ", (t=s).foldCase(U_FOLD_CASE_DEFAULT));
397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* case-folde/Turkic */
398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUnicodeString("full-case-folded/Turkic: ", (t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I));
399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// sample code for UnicodeString storage models ----------------------------- ***
402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar readonly[]={
404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x61, 0x31, 0x20ac
405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar writeable[]={
407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0x62, 0x32, 0xdbc0, 0xdc01 // includes a surrogate pair for a supplementary code point
408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic char out[100];
410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerudemoUnicodeStringStorage() {
413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // These sample code lines illustrate how to use UnicodeString, and the
414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // comments tell what happens internally. There are no APIs to observe
415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // most of this programmatically, except for stepping into the code
416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // with a debugger.
417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // This is by design to hide such details from the user.
418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i;
419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("\n* demoUnicodeStringStorage() ------- ***\n\n");
421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // * UnicodeString with internally stored contents
423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // instantiate a UnicodeString from a single code point
424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // the few (2) UChars will be stored in the object itself
425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString one((UChar32)0x24001);
426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // this copies the few UChars into the "two" object
427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString two=one;
428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("length of short string copy: %d\n", two.length());
429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // set "one" to contain the 3 UChars from readonly
430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // this setTo() variant copies the characters
431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    one.setTo(readonly, LENGTHOF(readonly));
432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // * UnicodeString with allocated contents
434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // build a longer string that will not fit into the object's buffer
435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    one+=UnicodeString(writeable, LENGTHOF(writeable));
436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    one+=one;
437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    one+=one;
438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("length of longer string: %d\n", one.length());
439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // copying will use the same allocated buffer and increment the reference
440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // counter
441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    two=one;
442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("length of longer string copy: %d\n", two.length());
443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // * UnicodeString using readonly-alias to a const UChar array
445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // construct a string that aliases a readonly buffer
446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString three(FALSE, readonly, LENGTHOF(readonly));
447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUnicodeString("readonly-alias string: ", three);
448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // copy-on-write: any modification to the string results in
449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // a copy to either the internal buffer or to a newly allocated one
450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    three.setCharAt(1, 0x39);
451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUnicodeString("readonly-aliasing string after modification: ", three);
452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // the aliased array is not modified
453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i=0; i<three.length(); ++i) {
454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("readonly buffer[%d] after modifying its string: 0x%lx\n",
455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru               i, readonly[i]);
456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // setTo() readonly alias
458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    one.setTo(FALSE, writeable, LENGTHOF(writeable));
459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // copying the readonly-alias object with fastCopyFrom() (new in ICU 2.4)
460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // will readonly-alias the same buffer
461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    two.fastCopyFrom(one);
462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUnicodeString("fastCopyFrom(readonly alias of \"writeable\" array): ", two);
463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("verify that a fastCopyFrom(readonly alias) uses the same buffer pointer: %d (should be 1)\n",
464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        one.getBuffer()==two.getBuffer());
465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // a normal assignment will clone the contents (new in ICU 2.4)
466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    two=one;
467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("verify that a regular copy of a readonly alias uses a different buffer pointer: %d (should be 0)\n",
468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        one.getBuffer()==two.getBuffer());
469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // * UnicodeString using writeable-alias to a non-const UChar array
471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString four(writeable, LENGTHOF(writeable), LENGTHOF(writeable));
472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUnicodeString("writeable-alias string: ", four);
473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // a modification writes through to the buffer
474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    four.setCharAt(1, 0x39);
475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i=0; i<four.length(); ++i) {
476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("writeable-alias backing buffer[%d]=0x%lx "
477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru               "after modification\n", i, writeable[i]);
478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // a copy will not alias any more;
480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // instead, it will get a copy of the contents into allocated memory
481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    two=four;
482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    two.setCharAt(1, 0x21);
483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i=0; i<two.length(); ++i) {
484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("writeable-alias backing buffer[%d]=0x%lx after "
485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru               "modification of string copy\n", i, writeable[i]);
486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // setTo() writeable alias, capacity==length
488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    one.setTo(writeable, LENGTHOF(writeable), LENGTHOF(writeable));
489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // grow the string - it will not fit into the backing buffer any more
490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // and will get copied before modification
491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    one.append((UChar)0x40);
492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // shrink it back so it would fit
493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    one.truncate(one.length()-1);
494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // we still operate on the copy
495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    one.setCharAt(1, 0x25);
496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("string after growing too much and then shrinking[1]=0x%lx\n"
497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           "                          backing store for this[1]=0x%lx\n",
498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           one.charAt(1), writeable[1]);
499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // if we need it in the original buffer, then extract() to it
500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // extract() does not do anything if the string aliases that same buffer
501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // i=min(one.length(), length of array)
502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(one.length()<LENGTHOF(writeable)) {
503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        i=one.length();
504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        i=LENGTHOF(writeable);
506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    one.extract(0, i, writeable);
508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i=0; i<LENGTHOF(writeable); ++i) {
509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("writeable-alias backing buffer[%d]=0x%lx after re-extract\n",
510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru               i, writeable[i]);
511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// sample code for UnicodeString instantiations ----------------------------- ***
515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerudemoUnicodeStringInit() {
518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // *** Make sure to read about invariant characters in utypes.h! ***
519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Initialization of Unicode strings from C literals works _only_ for
520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // invariant characters!
521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("\n* demoUnicodeStringInit() ---------- ***\n\n");
523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // the string literal is 32 chars long - this must be counted for the macro
525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString invariantOnly=UNICODE_STRING("such characters are safe 123 %-.", 32);
526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * In C, we need two macros: one to declare the UChar[] array, and
529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * one to populate it; the second one is a noop on platforms where
530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * wchar_t is compatible with UChar and ASCII-based.
531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The length of the string literal must be counted for both macros.
532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* declare the invString array for the string */
534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    U_STRING_DECL(invString, "such characters are safe 123 %-.", 32);
535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* populate it with the characters */
536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    U_STRING_INIT(invString, "such characters are safe 123 %-.", 32);
537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // compare the C and C++ strings
539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("C and C++ Unicode strings are equal: %d\n", invariantOnly==UnicodeString(TRUE, invString, 32));
540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * convert between char * and UChar * strings that
543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * contain only invariant characters
544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static const char *cs1="such characters are safe 123 %-.";
546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static UChar us1[40];
547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static char cs2[40];
548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    u_charsToUChars(cs1, us1, 33); /* include the terminating NUL */
549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    u_UCharsToChars(us1, cs2, 33);
550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("char * -> UChar * -> char * with only "
551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           "invariant characters: \"%s\"\n",
552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           cs2);
553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // initialize a UnicodeString from a string literal that contains
555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // escape sequences written with invariant characters
556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // do not forget to duplicate the backslashes for ICU to see them
557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // then, count each double backslash only once!
558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString german=UNICODE_STRING(
559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\n", 64).
560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        unescape();
561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUnicodeString("german UnicodeString from unescaping:\n    ", german);
562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * C: convert and unescape a char * string with only invariant
565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * characters to fill a UChar * string
566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar buffer[200];
568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t length;
569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    length=u_unescape(
570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\n",
571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        buffer, LENGTHOF(buffer));
572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("german C Unicode string from char * unescaping: (length %d)\n    ", length);
573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUnicodeString("", UnicodeString(buffer));
574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern int
577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querumain(int argc, const char *argv[]) {
578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode errorCode=U_ZERO_ERROR;
579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Note: Using a global variable for any object is not exactly thread-safe...
581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // You can change this call to e.g. ucnv_open("UTF-8", &errorCode) if you pipe
583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // the output to a file and look at it with a Unicode-capable editor.
584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // This will currently affect only the printUString() function, see the code above.
585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // printUnicodeString() could use this, too, by changing to an extract() overload
586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // that takes a UConverter argument.
587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv=ucnv_open(NULL, &errorCode);
588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fprintf(stderr, "error %s opening the default converter\n", u_errorName(errorCode));
590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return errorCode;
591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, NULL, NULL, &errorCode);
594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fprintf(stderr, "error %s setting the escape callback in the default converter\n", u_errorName(errorCode));
596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_close(cnv);
597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return errorCode;
598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    demo_utf_h_macros();
601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    demo_C_Unicode_strings();
602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    demoCaseMapInC();
603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    demoCaseMapInCPlusPlus();
604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    demoUnicodeStringStorage();
605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    demoUnicodeStringInit();
606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_close(cnv);
608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return 0;
609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
610