1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Copyright (C) 2000-2002, International Business Machines 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* file name: ustring.c 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* encoding: US-ASCII 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* tab size: 8 (not used) 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* indentation:4 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created on: 2000aug15 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created by: Markus W. Scherer 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* This file contains sample code that illustrates the use of Unicode strings 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* with ICU. 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdio.h> 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uchar.h" 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/locid.h" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ustring.h" 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv.h" 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unistr.h" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0])) 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// helper functions -------------------------------------------------------- *** 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// default converter for the platform encoding 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UConverter *cnv=NULL; 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruprintUString(const char *announce, const UChar *s, int32_t length) { 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static char out[200]; 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i; 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Convert to the "platform encoding". See notes in printUnicodeString(). 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ucnv_fromUChars(), like most ICU APIs understands length==-1 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to mean that the string is NUL-terminated. 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUChars(cnv, out, sizeof(out), s, length, &errorCode); 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(errorCode) || errorCode==U_STRING_NOT_TERMINATED_WARNING) { 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("%sproblem converting string from Unicode: %s\n", announce, u_errorName(errorCode)); 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("%s%s {", announce, out); 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output the code points (not code units) */ 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>=0) { 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* s is not NUL-terminated */ 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<length; /* U16_NEXT post-increments */) { 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_NEXT(s, i, length, c); 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf(" %04x", c); 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* s is NUL-terminated */ 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; /* condition in loop body */; /* U16_NEXT post-increments */) { 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_NEXT(s, i, length, c); 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c==0) { 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf(" %04x", c); 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf(" }\n"); 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruprintUnicodeString(const char *announce, const UnicodeString &s) { 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static char out[200]; 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, length; 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // output the string, converted to the platform encoding 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Note for Windows: The "platform encoding" defaults to the "ANSI codepage", 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // which is different from the "OEM codepage" in the console window. 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // However, if you pipe the output into a file and look at it with Notepad 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // or similar, then "ANSI" characters will show correctly. 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Production code should be aware of what encoding is required, 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // and use a UConverter or at least a charset name explicitly. 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru out[s.extract(0, 99, out)]=0; 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("%s%s {", announce, out); 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // output the code units (not code points) 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=s.length(); 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<length; ++i) { 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf(" %04x", s.charAt(i)); 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf(" }\n"); 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// sample code for utf.h macros -------------------------------------------- *** 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querudemo_utf_h_macros() { 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static UChar input[]={ 0x0061, 0xd800, 0xdc00, 0xdbff, 0xdfff, 0x0062 }; 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i; 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isError; 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\n* demo_utf_h_macros() -------------- ***\n\n"); 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUString("iterate forward through: ", input, LENGTHOF(input)); 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<LENGTHOF(input); /* U16_NEXT post-increments */) { 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Iterating forwards 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Codepoint at offset 0: U+0061 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Codepoint at offset 1: U+10000 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Codepoint at offset 3: U+10ffff 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Codepoint at offset 5: U+0062 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("Codepoint at offset %d: U+", i); 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_NEXT(input, i, LENGTHOF(input), c); 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("%04x\n", c); 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru puts(""); 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isError=FALSE; 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=1; /* write position, gets post-incremented so needs to be in an l-value */ 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_APPEND(input, i, LENGTHOF(input), 0x0062, isError); 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUString("iterate backward through: ", input, LENGTHOF(input)); 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=LENGTHOF(input); i>0; /* U16_PREV pre-decrements */) { 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_PREV(input, 0, i, c); 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Iterating backwards 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Codepoint at offset 5: U+0062 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Codepoint at offset 3: U+10ffff 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Codepoint at offset 2: U+dc00 -- unpaired surrogate because lead surr. overwritten 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Codepoint at offset 1: U+0062 -- by this BMP code point 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Codepoint at offset 0: U+0061 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("Codepoint at offset %d: U+%04x\n", i, c); 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// sample code for Unicode strings in C ------------------------------------ *** 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void demo_C_Unicode_strings() { 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\n* demo_C_Unicode_strings() --------- ***\n\n"); 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static const UChar text[]={ 0x41, 0x42, 0x43, 0 }; /* "ABC" */ 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static const UChar appendText[]={ 0x61, 0x62, 0x63, 0 }; /* "abc" */ 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static const UChar cmpText[]={ 0x61, 0x53, 0x73, 0x43, 0 }; /* "aSsC" */ 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar buffer[32]; 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t compare; 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length=u_strlen(text); /* length=3 */ 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* simple ANSI C-style functions */ 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[0]=0; /* empty, NUL-terminated string */ 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_strncat(buffer, text, 1); /* append just n=1 character ('A') */ 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_strcat(buffer, appendText); /* buffer=="Aabc" */ 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=u_strlen(buffer); /* length=4 */ 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUString("should be \"Aabc\": ", buffer, -1); 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* bitwise comparing buffer with text */ 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru compare=u_strcmp(buffer, text); 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(compare<=0) { 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("String comparison error, expected \"Aabc\" > \"ABC\"\n"); 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Build "A<sharp s>C" in the buffer... */ 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_strcpy(buffer, text); 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[1]=0xdf; /* sharp s, case-compares equal to "ss" */ 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUString("should be \"A<sharp s>C\": ", buffer, -1); 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Compare two strings case-insensitively using full case folding */ 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru compare=u_strcasecmp(buffer, cmpText, U_FOLD_CASE_DEFAULT); 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(compare!=0) { 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("String case insensitive comparison error, expected \"AbC\" to be equal to \"ABC\"\n"); 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// sample code for case mappings with C APIs -------------------------------- *** 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void demoCaseMapInC() { 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * input= 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * "aB<capital sigma>" 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * "iI<small dotless i><capital dotted I> " 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * "<sharp s> <small lig. ffi>" 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * "<small final sigma><small sigma><capital sigma>" 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static const UChar input[]={ 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x61, 0x42, 0x3a3, 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x69, 0x49, 0x131, 0x130, 0x20, 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0xdf, 0x20, 0xfb03, 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x3c2, 0x3c3, 0x3a3, 0 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }; 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar buffer[32]; 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode errorCode; 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, j, length; 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isError; 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\n* demoCaseMapInC() ----------------- ***\n\n"); 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * First, use simple case mapping functions which provide 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1:1 code point mappings without context/locale ID. 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Note that some mappings will not be "right" because some "real" 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * case mappings require context, depend on the locale ID, 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and/or result in a change in the number of code points. 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUString("input string: ", input, -1); 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* uppercase */ 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isError=FALSE; 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=j=0; j<LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) { 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */ 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c==0) { 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; /* stop at terminating NUL, no need to terminate buffer */ 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=u_toupper(c); 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_APPEND(buffer, j, LENGTHOF(buffer), c, isError); 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUString("simple-uppercased: ", buffer, j); 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* lowercase */ 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isError=FALSE; 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=j=0; j<LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) { 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */ 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c==0) { 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; /* stop at terminating NUL, no need to terminate buffer */ 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=u_tolower(c); 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_APPEND(buffer, j, LENGTHOF(buffer), c, isError); 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUString("simple-lowercased: ", buffer, j); 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* titlecase */ 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isError=FALSE; 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=j=0; j<LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) { 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */ 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c==0) { 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; /* stop at terminating NUL, no need to terminate buffer */ 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=u_totitle(c); 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_APPEND(buffer, j, LENGTHOF(buffer), c, isError); 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUString("simple-titlecased: ", buffer, j); 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* case-fold/default */ 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isError=FALSE; 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=j=0; j<LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) { 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */ 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c==0) { 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; /* stop at terminating NUL, no need to terminate buffer */ 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=u_foldCase(c, U_FOLD_CASE_DEFAULT); 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_APPEND(buffer, j, LENGTHOF(buffer), c, isError); 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUString("simple-case-folded/default: ", buffer, j); 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* case-fold/Turkic */ 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isError=FALSE; 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=j=0; j<LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) { 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */ 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c==0) { 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; /* stop at terminating NUL, no need to terminate buffer */ 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I); 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_APPEND(buffer, j, LENGTHOF(buffer), c, isError); 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUString("simple-case-folded/Turkic: ", buffer, j); 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Second, use full case mapping functions which provide 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1:n code point mappings (n can be 0!) and are sensitive to context and locale ID. 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Note that lower/upper/titlecasing take a locale ID while case-folding 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * has bit flag options instead, by design of the Unicode SpecialCasing.txt UCD file. 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Also, string titlecasing requires a BreakIterator to find starts of words. 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The sample code here passes in a NULL pointer; u_strToTitle() will open and close a default 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * titlecasing BreakIterator automatically. 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * For production code where many strings are titlecased it would be more efficient 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to open a BreakIterator externally and pass it in. 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUString("\ninput string: ", input, -1); 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* lowercase/English */ 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=u_strToLower(buffer, LENGTHOF(buffer), input, -1, "en", &errorCode); 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(errorCode)) { 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUString("full-lowercased/en: ", buffer, length); 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("error in u_strToLower(en)=%ld error=%s\n", length, u_errorName(errorCode)); 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* lowercase/Turkish */ 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=u_strToLower(buffer, LENGTHOF(buffer), input, -1, "tr", &errorCode); 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(errorCode)) { 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUString("full-lowercased/tr: ", buffer, length); 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("error in u_strToLower(tr)=%ld error=%s\n", length, u_errorName(errorCode)); 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* uppercase/English */ 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=u_strToUpper(buffer, LENGTHOF(buffer), input, -1, "en", &errorCode); 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(errorCode)) { 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUString("full-uppercased/en: ", buffer, length); 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("error in u_strToUpper(en)=%ld error=%s\n", length, u_errorName(errorCode)); 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* uppercase/Turkish */ 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=u_strToUpper(buffer, LENGTHOF(buffer), input, -1, "tr", &errorCode); 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(errorCode)) { 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUString("full-uppercased/tr: ", buffer, length); 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("error in u_strToUpper(tr)=%ld error=%s\n", length, u_errorName(errorCode)); 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* titlecase/English */ 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=u_strToTitle(buffer, LENGTHOF(buffer), input, -1, NULL, "en", &errorCode); 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(errorCode)) { 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUString("full-titlecased/en: ", buffer, length); 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("error in u_strToTitle(en)=%ld error=%s\n", length, u_errorName(errorCode)); 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* titlecase/Turkish */ 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=u_strToTitle(buffer, LENGTHOF(buffer), input, -1, NULL, "tr", &errorCode); 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(errorCode)) { 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUString("full-titlecased/tr: ", buffer, length); 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("error in u_strToTitle(tr)=%ld error=%s\n", length, u_errorName(errorCode)); 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* case-fold/default */ 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=u_strFoldCase(buffer, LENGTHOF(buffer), input, -1, U_FOLD_CASE_DEFAULT, &errorCode); 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(errorCode)) { 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUString("full-case-folded/default: ", buffer, length); 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("error in u_strFoldCase(default)=%ld error=%s\n", length, u_errorName(errorCode)); 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* case-fold/Turkic */ 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=u_strFoldCase(buffer, LENGTHOF(buffer), input, -1, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode); 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(errorCode)) { 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUString("full-case-folded/Turkic: ", buffer, length); 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("error in u_strFoldCase(Turkic)=%ld error=%s\n", length, u_errorName(errorCode)); 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// sample code for case mappings with C++ APIs ------------------------------ *** 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void demoCaseMapInCPlusPlus() { 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * input= 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * "aB<capital sigma>" 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * "iI<small dotless i><capital dotted I> " 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * "<sharp s> <small lig. ffi>" 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * "<small final sigma><small sigma><capital sigma>" 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static const UChar input[]={ 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x61, 0x42, 0x3a3, 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x69, 0x49, 0x131, 0x130, 0x20, 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0xdf, 0x20, 0xfb03, 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x3c2, 0x3c3, 0x3a3, 0 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }; 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\n* demoCaseMapInCPlusPlus() --------- ***\n\n"); 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString s(input), t; 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const Locale &en=Locale::getEnglish(); 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Locale tr("tr"); 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Full case mappings as in demoCaseMapInC(), using UnicodeString functions. 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * These functions modify the string object itself. 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Since we want to keep the input string around, we copy it each time 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and case-map the copy. 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUnicodeString("input string: ", s); 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* lowercase/English */ 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUnicodeString("full-lowercased/en: ", (t=s).toLower(en)); 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* lowercase/Turkish */ 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUnicodeString("full-lowercased/tr: ", (t=s).toLower(tr)); 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* uppercase/English */ 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUnicodeString("full-uppercased/en: ", (t=s).toUpper(en)); 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* uppercase/Turkish */ 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUnicodeString("full-uppercased/tr: ", (t=s).toUpper(tr)); 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* titlecase/English */ 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUnicodeString("full-titlecased/en: ", (t=s).toTitle(NULL, en)); 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* titlecase/Turkish */ 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUnicodeString("full-titlecased/tr: ", (t=s).toTitle(NULL, tr)); 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* case-folde/default */ 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUnicodeString("full-case-folded/default: ", (t=s).foldCase(U_FOLD_CASE_DEFAULT)); 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* case-folde/Turkic */ 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUnicodeString("full-case-folded/Turkic: ", (t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I)); 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// sample code for UnicodeString storage models ----------------------------- *** 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar readonly[]={ 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x61, 0x31, 0x20ac 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar writeable[]={ 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x62, 0x32, 0xdbc0, 0xdc01 // includes a surrogate pair for a supplementary code point 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic char out[100]; 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerudemoUnicodeStringStorage() { 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // These sample code lines illustrate how to use UnicodeString, and the 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // comments tell what happens internally. There are no APIs to observe 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // most of this programmatically, except for stepping into the code 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // with a debugger. 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // This is by design to hide such details from the user. 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i; 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\n* demoUnicodeStringStorage() ------- ***\n\n"); 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // * UnicodeString with internally stored contents 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // instantiate a UnicodeString from a single code point 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // the few (2) UChars will be stored in the object itself 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString one((UChar32)0x24001); 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // this copies the few UChars into the "two" object 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString two=one; 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("length of short string copy: %d\n", two.length()); 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // set "one" to contain the 3 UChars from readonly 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // this setTo() variant copies the characters 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru one.setTo(readonly, LENGTHOF(readonly)); 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // * UnicodeString with allocated contents 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // build a longer string that will not fit into the object's buffer 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru one+=UnicodeString(writeable, LENGTHOF(writeable)); 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru one+=one; 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru one+=one; 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("length of longer string: %d\n", one.length()); 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // copying will use the same allocated buffer and increment the reference 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // counter 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru two=one; 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("length of longer string copy: %d\n", two.length()); 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // * UnicodeString using readonly-alias to a const UChar array 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // construct a string that aliases a readonly buffer 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString three(FALSE, readonly, LENGTHOF(readonly)); 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUnicodeString("readonly-alias string: ", three); 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // copy-on-write: any modification to the string results in 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // a copy to either the internal buffer or to a newly allocated one 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru three.setCharAt(1, 0x39); 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUnicodeString("readonly-aliasing string after modification: ", three); 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // the aliased array is not modified 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<three.length(); ++i) { 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("readonly buffer[%d] after modifying its string: 0x%lx\n", 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i, readonly[i]); 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // setTo() readonly alias 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru one.setTo(FALSE, writeable, LENGTHOF(writeable)); 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // copying the readonly-alias object with fastCopyFrom() (new in ICU 2.4) 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // will readonly-alias the same buffer 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru two.fastCopyFrom(one); 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUnicodeString("fastCopyFrom(readonly alias of \"writeable\" array): ", two); 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("verify that a fastCopyFrom(readonly alias) uses the same buffer pointer: %d (should be 1)\n", 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru one.getBuffer()==two.getBuffer()); 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // a normal assignment will clone the contents (new in ICU 2.4) 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru two=one; 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("verify that a regular copy of a readonly alias uses a different buffer pointer: %d (should be 0)\n", 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru one.getBuffer()==two.getBuffer()); 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // * UnicodeString using writeable-alias to a non-const UChar array 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString four(writeable, LENGTHOF(writeable), LENGTHOF(writeable)); 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUnicodeString("writeable-alias string: ", four); 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // a modification writes through to the buffer 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru four.setCharAt(1, 0x39); 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<four.length(); ++i) { 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("writeable-alias backing buffer[%d]=0x%lx " 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "after modification\n", i, writeable[i]); 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // a copy will not alias any more; 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // instead, it will get a copy of the contents into allocated memory 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru two=four; 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru two.setCharAt(1, 0x21); 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<two.length(); ++i) { 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("writeable-alias backing buffer[%d]=0x%lx after " 485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "modification of string copy\n", i, writeable[i]); 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // setTo() writeable alias, capacity==length 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru one.setTo(writeable, LENGTHOF(writeable), LENGTHOF(writeable)); 489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // grow the string - it will not fit into the backing buffer any more 490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // and will get copied before modification 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru one.append((UChar)0x40); 492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // shrink it back so it would fit 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru one.truncate(one.length()-1); 494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // we still operate on the copy 495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru one.setCharAt(1, 0x25); 496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("string after growing too much and then shrinking[1]=0x%lx\n" 497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru " backing store for this[1]=0x%lx\n", 498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru one.charAt(1), writeable[1]); 499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // if we need it in the original buffer, then extract() to it 500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // extract() does not do anything if the string aliases that same buffer 501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // i=min(one.length(), length of array) 502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(one.length()<LENGTHOF(writeable)) { 503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=one.length(); 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=LENGTHOF(writeable); 506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru one.extract(0, i, writeable); 508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<LENGTHOF(writeable); ++i) { 509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("writeable-alias backing buffer[%d]=0x%lx after re-extract\n", 510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i, writeable[i]); 511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// sample code for UnicodeString instantiations ----------------------------- *** 515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerudemoUnicodeStringInit() { 518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // *** Make sure to read about invariant characters in utypes.h! *** 519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Initialization of Unicode strings from C literals works _only_ for 520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // invariant characters! 521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\n* demoUnicodeStringInit() ---------- ***\n\n"); 523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // the string literal is 32 chars long - this must be counted for the macro 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString invariantOnly=UNICODE_STRING("such characters are safe 123 %-.", 32); 526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * In C, we need two macros: one to declare the UChar[] array, and 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * one to populate it; the second one is a noop on platforms where 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * wchar_t is compatible with UChar and ASCII-based. 531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The length of the string literal must be counted for both macros. 532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* declare the invString array for the string */ 534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_STRING_DECL(invString, "such characters are safe 123 %-.", 32); 535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* populate it with the characters */ 536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_STRING_INIT(invString, "such characters are safe 123 %-.", 32); 537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // compare the C and C++ strings 539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("C and C++ Unicode strings are equal: %d\n", invariantOnly==UnicodeString(TRUE, invString, 32)); 540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * convert between char * and UChar * strings that 543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * contain only invariant characters 544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static const char *cs1="such characters are safe 123 %-."; 546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static UChar us1[40]; 547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static char cs2[40]; 548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_charsToUChars(cs1, us1, 33); /* include the terminating NUL */ 549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_UCharsToChars(us1, cs2, 33); 550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("char * -> UChar * -> char * with only " 551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "invariant characters: \"%s\"\n", 552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs2); 553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // initialize a UnicodeString from a string literal that contains 555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // escape sequences written with invariant characters 556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // do not forget to duplicate the backslashes for ICU to see them 557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // then, count each double backslash only once! 558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString german=UNICODE_STRING( 559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\n", 64). 560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru unescape(); 561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUnicodeString("german UnicodeString from unescaping:\n ", german); 562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * C: convert and unescape a char * string with only invariant 565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * characters to fill a UChar * string 566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar buffer[200]; 568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length; 569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=u_unescape( 570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\n", 571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer, LENGTHOF(buffer)); 572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("german C Unicode string from char * unescaping: (length %d)\n ", length); 573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printUnicodeString("", UnicodeString(buffer)); 574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern int 577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querumain(int argc, const char *argv[]) { 578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Note: Using a global variable for any object is not exactly thread-safe... 581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // You can change this call to e.g. ucnv_open("UTF-8", &errorCode) if you pipe 583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // the output to a file and look at it with a Unicode-capable editor. 584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // This will currently affect only the printUString() function, see the code above. 585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // printUnicodeString() could use this, too, by changing to an extract() overload 586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // that takes a UConverter argument. 587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=ucnv_open(NULL, &errorCode); 588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error %s opening the default converter\n", u_errorName(errorCode)); 590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return errorCode; 591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, NULL, NULL, &errorCode); 594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error %s setting the escape callback in the default converter\n", u_errorName(errorCode)); 596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_close(cnv); 597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return errorCode; 598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru demo_utf_h_macros(); 601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru demo_C_Unicode_strings(); 602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru demoCaseMapInC(); 603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru demoCaseMapInCPlusPlus(); 604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru demoUnicodeStringStorage(); 605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru demoUnicodeStringInit(); 606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_close(cnv); 608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 610