1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**************************************************************************
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
359d709d503bab6e2b61931737e662dd293b40578ccornelius*   Copyright (C) 2000-2013, International Business Machines
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru***************************************************************************
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   file name:  convsamp.c
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   encoding:   ASCII (7-bit)
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   created on: 2000may30
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   created by: Steven R. Loomis
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Sample code for the ICU conversion routines.
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Note: Nothing special is needed to build this sample. Link with
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*       the icu UC and icu I18N libraries.
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*       I use 'assert' for error checking, you probably will want
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*       something more flexible.  '***BEGIN SAMPLE***' and
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*       '***END SAMPLE***' mark pieces suitable for stand alone
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*       code snippets.
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*  Each test can define it's own BUFFERSIZE
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define DEBUG_TMI 0  /* define to 1 to enable Too Much Information */
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h>
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <ctype.h>            /* for isspace, etc.    */
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <assert.h>
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <string.h>
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdlib.h>  /* malloc */
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"   /* Basic ICU data types */
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucnv.h"     /* C   Converter API    */
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h"  /* some more string fcns*/
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h"    /* char names           */
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uloc.h"
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unistr.h"
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "flagcb.h"
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Some utility functions */
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar kNone[] = { 0x0000 };
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define U_ASSERT(x)  { if(U_FAILURE(x)) {fflush(stdout);fflush(stderr); fprintf(stderr, #x " == %s\n", u_errorName(x)); assert(U_SUCCESS(x)); }}
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Print a UChar if possible, in seven characters. */
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid prettyPrintUChar(UChar c)
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(  (c <= 0x007F) &&
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       (isgraph(c))  ) {
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf(" '%c'   ", (char)(0x00FF&c));
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  } else if ( c > 0x007F ) {
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char buf[1000];
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t o;
6183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
6283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    o = u_charName(c, U_EXTENDED_CHAR_NAME, buf, 1000, &status);
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_SUCCESS(status) && (o>0) ) {
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      buf[6] = 0;
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      printf("%7s", buf);
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
6783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius      printf(" ??????");
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  } else {
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch((char)(c & 0x007F)) {
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case ' ':
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      printf(" ' '   ");
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      break;
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case '\t':
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      printf(" \\t    ");
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      break;
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case '\n':
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      printf(" \\n    ");
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      break;
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    default:
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      printf("  _    ");
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      break;
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid printUChars(const char  *name = "?",
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 const UChar *uch  = kNone,
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 int32_t     len   = -1 )
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t i;
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if( (len == -1) && (uch) ) {
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    len = u_strlen(uch);
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("%5s: ", name);
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  for( i = 0; i <len; i++) {
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("%-6d ", i);
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n");
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("%5s: ", "uni");
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  for( i = 0; i <len; i++) {
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("\\u%04X ", (int)uch[i]);
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n");
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("%5s:", "ch");
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  for( i = 0; i <len; i++) {
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    prettyPrintUChar(uch[i]);
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n");
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid printBytes(const char  *name = "?",
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 const char *uch  = "",
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 int32_t     len   = -1 )
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t i;
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if( (len == -1) && (uch) ) {
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    len = strlen(uch);
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("%5s: ", name);
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  for( i = 0; i <len; i++) {
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("%-4d ", i);
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n");
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("%5s: ", "uni");
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  for( i = 0; i <len; i++) {
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("\\x%02X ", 0x00FF & (int)uch[i]);
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n");
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("%5s:", "ch");
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  for( i = 0; i <len; i++) {
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(isgraph(0x00FF & (int)uch[i])) {
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      printf(" '%c' ", (char)uch[i]);
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      printf("     ");
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n");
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid printUChar(UChar32 ch32)
151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(ch32 > 0xFFFF) {
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      printf("ch: U+%06X\n", ch32);
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UChar ch = (UChar)ch32;
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      printUChars("C", &ch, 1);
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*******************************************************************
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  Very simple C sample to convert the word 'Moscow' in Russian in Unicode,
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  followed by an exclamation mark (!) into the KOI8-R Russian code page.
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  This example first creates a UChar String out of the Unicode chars.
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  targetSize must be set to the amount of space available in the target
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  buffer. After fromUChars is called,
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  len will contain the number of bytes in target[] which were
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  used in the resulting codepage.  In this case, there is a 1:1 mapping
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  between the input and output characters. The exclamation mark has the
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  same value in both KOI8-R and Unicode.
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  src: 0      1      2      3      4      5      6
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uni: \u041C \u043E \u0441 \u043A \u0432 \u0430 \u0021
176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   ch: CYRILL CYRILL CYRILL CYRILL CYRILL CYRILL   '!'
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru targ:  0    1    2    3    4    5    6
179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uni: \xED \xCF \xD3 \xCB \xD7 \xC1 \x21
180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   ch:                                '!'
181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruConverting FROM unicode
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  to koi8-r.
185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  You must call ucnv_close to clean up the memory used by the
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  converter.
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  'len' returns the number of OUTPUT bytes resulting from the
189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  conversion.
190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUErrorCode convsample_02()
193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n\n==============================================\n"
195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         "Sample 02: C: simple Unicode -> koi8-r conversion\n");
196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // **************************** START SAMPLE *******************
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // "cat<cat>OK"
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar source[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432,
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     0x0430, 0x0021, 0x0000 };
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  char target[100];
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UErrorCode status = U_ZERO_ERROR;
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UConverter *conv;
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t     len;
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // set up the converter
20883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius  //! [ucnv_open]
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  conv = ucnv_open("koi8-r", &status);
21083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius  //! [ucnv_open]
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  assert(U_SUCCESS(status));
212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // convert to koi8-r
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  len = ucnv_fromUChars(conv, target, 100, source, -1, &status);
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  assert(U_SUCCESS(status));
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // close the converter
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  ucnv_close(conv);
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // ***************************** END SAMPLE ********************
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // Print it out
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printUChars("src", source);
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n");
225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printBytes("targ", target, len);
226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  return U_ZERO_ERROR;
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUErrorCode convsample_03()
232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n\n==============================================\n"
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         "Sample 03: C: print out all converters\n");
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t count;
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t i;
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // **************************** START SAMPLE *******************
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  count = ucnv_countAvailable();
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("Available converters: %d\n", count);
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  for(i=0;i<count;i++)
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("%s ", ucnv_getAvailableName(i));
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // ***************************** END SAMPLE ********************
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n");
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  return U_ZERO_ERROR;
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BUFFERSIZE 17 /* make it interesting :) */
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  Converting from a codepage to Unicode in bulk..
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  What is the best way to determine the buffer size?
262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     The 'buffersize' is in bytes of input.
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    For a given converter, divinding this by the minimum char size
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    give you the maximum number of Unicode characters that could be
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expected for a given number of input bytes.
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     see: ucnv_getMinCharSize()
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     For example, a single byte codepage like 'Latin-3' has a
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    minimum char size of 1. (It takes at least 1 byte to represent
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    each Unicode char.) So the unicode buffer has the same number of
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChars as the input buffer has bytes.
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     In a strictly double byte codepage such as cp1362 (Windows
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Korean), the minimum char size is 2. So, only half as many Unicode
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    chars as bytes are needed.
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     This work to calculate the buffer size is an optimization. Any
279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    size of input and output buffer can be used, as long as the
280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    program handles the following cases: If the input buffer is empty,
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    the source pointer will be equal to sourceLimit.  If the output
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    buffer has overflowed, U_BUFFER_OVERFLOW_ERROR will be returned.
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUErrorCode convsample_05()
286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n\n==============================================\n"
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         "Sample 05: C: count the number of letters in a UTF-8 document\n");
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  FILE *f;
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t count;
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  char inBuf[BUFFERSIZE];
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  const char *source;
294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  const char *sourceLimit;
295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar *uBuf;
296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar *target;
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar *targetLimit;
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar *p;
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t uBufSize = 0;
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UConverter *conv;
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UErrorCode status = U_ZERO_ERROR;
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uint32_t letters=0, total=0;
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  f = fopen("data01.txt", "r");
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(!f)
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fprintf(stderr, "Couldn't open file 'data01.txt' (UTF-8 data file).\n");
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return U_FILE_ACCESS_ERROR;
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // **************************** START SAMPLE *******************
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  conv = ucnv_open("utf-8", &status);
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  assert(U_SUCCESS(status));
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("input bytes %d / min chars %d = %d UChars\n",
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  assert(uBuf!=NULL);
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // grab another buffer's worth
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  while((!feof(f)) &&
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Convert bytes to unicode
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    source = inBuf;
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    sourceLimit = inBuf + count;
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    do
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        target = uBuf;
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        targetLimit = uBuf + uBufSize;
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ucnv_toUnicode(conv, &target, targetLimit,
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       &source, sourceLimit, NULL,
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       feof(f)?TRUE:FALSE,         /* pass 'flush' when eof */
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                   /* is true (when no more data will come) */
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       &status);
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(status == U_BUFFER_OVERFLOW_ERROR)
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          // simply ran out of space - we'll reset the target ptr the next
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          // time through the loop.
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          status = U_ZERO_ERROR;
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          //  Check other errors here.
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          assert(U_SUCCESS(status));
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          // Break out of the loop (by force)
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Process the Unicode
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Todo: handle UTF-16/surrogates
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(p = uBuf; p<target; p++)
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          if(u_isalpha(*p))
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            letters++;
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          total++;
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } while (source < sourceLimit); // while simply out of space
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("%d letters out of %d total UChars.\n", letters, total);
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // ***************************** END SAMPLE ********************
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  ucnv_close(conv);
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n");
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
37250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  fclose(f);
37350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  return U_ZERO_ERROR;
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#undef BUFFERSIZE
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BUFFERSIZE 1024
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar32  codepoint;
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uint32_t frequency;
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} CharFreqInfo;
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUErrorCode convsample_06()
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n\n==============================================\n"
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         "Sample 06: C: frequency distribution of letters in a UTF-8 document\n");
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  FILE *f;
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t count;
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  char inBuf[BUFFERSIZE];
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  const char *source;
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  const char *sourceLimit;
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t uBufSize = 0;
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UConverter *conv;
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UErrorCode status = U_ZERO_ERROR;
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uint32_t letters=0, total=0;
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  CharFreqInfo   *info;
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar32   charCount = 0x10000;  /* increase this if you want to handle non bmp.. todo: automatically bump it.. */
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar32   p;
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uint32_t ie = 0;
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uint32_t gh = 0;
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar32 l = 0;
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  f = fopen("data06.txt", "r");
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(!f)
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fprintf(stderr, "Couldn't open file 'data06.txt' (UTF-8 data file).\n");
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return U_FILE_ACCESS_ERROR;
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  info = (CharFreqInfo*)malloc(sizeof(CharFreqInfo) * charCount);
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(!info)
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fprintf(stderr, " Couldn't allocate %d bytes for freq counter\n", sizeof(CharFreqInfo)*charCount);
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /* reset frequencies */
422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  for(p=0;p<charCount;p++)
423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    info[p].codepoint = p;
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    info[p].frequency = 0;
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // **************************** START SAMPLE *******************
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  conv = ucnv_open("utf-8", &status);
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  assert(U_SUCCESS(status));
431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("input bytes %d / min chars %d = %d UChars\n",
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // grab another buffer's worth
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  while((!feof(f)) &&
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Convert bytes to unicode
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    source = inBuf;
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    sourceLimit = inBuf + count;
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while(source < sourceLimit)
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      p = ucnv_getNextUChar(conv, &source, sourceLimit, &status);
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      if(U_FAILURE(status))
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      {
449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "%s @ %d\n", u_errorName(status), total);
450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ZERO_ERROR;
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        continue;
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      }
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      U_ASSERT(status);
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      total++;
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      if(u_isalpha(p))
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        letters++;
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      if((u_tolower(l) == 'i') && (u_tolower(p) == 'e'))
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ie++;
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      if((u_tolower(l) == 'g') && (u_tolower(p) == 0x0127))
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        gh++;
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      if(p>charCount)
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      {
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p);
468b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        free(info);
469b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        fclose(f);
470b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ucnv_close(conv);
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return U_UNSUPPORTED_ERROR;
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      }
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      info[p].frequency++;
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      l = p;
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  fclose(f);
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  ucnv_close(conv);
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("%d letters out of %d total UChars.\n", letters, total);
482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("%d ie digraphs, %d gh digraphs.\n", ie, gh);
483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // now, we could sort it..
485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //  qsort(info, charCount, sizeof(info[0]), charfreq_compare);
487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  for(p=0;p<charCount;p++)
489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(info[p].frequency)
491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      printf("% 5d U+%06X ", info[p].frequency, p);
493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      if(p <= 0xFFFF)
494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      {
495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        prettyPrintUChar((UChar)p);
496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      }
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      printf("\n");
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  free(info);
501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // ***************************** END SAMPLE ********************
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n");
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  return U_ZERO_ERROR;
506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#undef BUFFERSIZE
508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/******************************************************
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  You must call ucnv_close to clean up the memory used by the
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  converter.
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  'len' returns the number of OUTPUT bytes resulting from the
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  conversion.
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUErrorCode convsample_12()
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n\n==============================================\n"
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         "Sample 12: C: simple sjis -> unicode conversion\n");
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // **************************** START SAMPLE *******************
525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  char source[] = { 0x63, 0x61, 0x74, (char)0x94, 0x4C, (char)0x82, 0x6E, (char)0x82, 0x6A, 0x00 };
527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar target[100];
528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UErrorCode status = U_ZERO_ERROR;
529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UConverter *conv;
530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t     len;
531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // set up the converter
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  conv = ucnv_open("shift_jis", &status);
534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  assert(U_SUCCESS(status));
535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // convert to Unicode
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // Note: we can use strlen, we know it's an 8 bit null terminated codepage
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  target[6] = 0xFDCA;
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  len = ucnv_toUChars(conv, target, 100, source, strlen(source), &status);
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  U_ASSERT(status);
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // close the converter
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  ucnv_close(conv);
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // ***************************** END SAMPLE ********************
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // Print it out
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printBytes("src", source, strlen(source) );
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n");
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printUChars("targ", target, len);
550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  return U_ZERO_ERROR;
552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/******************************************************************
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   C: Convert from codepage to Unicode one at a time.
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUErrorCode convsample_13()
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n\n==============================================\n"
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         "Sample 13: C: simple Big5 -> unicode conversion, char at a time\n");
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  const char sourceChars[] = { 0x7a, 0x68, 0x3d, (char)0xa4, (char)0xa4, (char)0xa4, (char)0xe5, (char)0x2e };
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //  const char sourceChars[] = { 0x7a, 0x68, 0x3d, 0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87, 0x2e };
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  const char *source, *sourceLimit;
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar32 target;
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UErrorCode status = U_ZERO_ERROR;
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UConverter *conv = NULL;
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t srcCount=0;
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t dstCount=0;
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  srcCount = sizeof(sourceChars);
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  conv = ucnv_open("Big5", &status);
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  U_ASSERT(status);
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  source = sourceChars;
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  sourceLimit = sourceChars + sizeof(sourceChars);
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // **************************** START SAMPLE *******************
582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printBytes("src",source,sourceLimit-source);
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  while(source < sourceLimit)
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    puts("");
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    target = ucnv_getNextUChar (conv,
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                &source,
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                sourceLimit,
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                &status);
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //    printBytes("src",source,sourceLimit-source);
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    U_ASSERT(status);
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printUChar(target);
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dstCount++;
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // ************************** END SAMPLE *************************
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("src=%d bytes, dst=%d uchars\n", srcCount, dstCount);
604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  ucnv_close(conv);
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  return U_ZERO_ERROR;
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool convsample_20_didSubstitute(const char *source)
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar uchars[100];
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  char bytes[100];
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UConverter *conv = NULL;
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UErrorCode status = U_ZERO_ERROR;
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uint32_t len, len2;
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UBool  flagVal;
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  FromUFLAGContext * context = NULL;
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n\n==============================================\n"
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         "Sample 20: C: Test for substitution using callbacks\n");
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /* print out the original source */
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printBytes("src", source);
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n");
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /* First, convert from UTF8 to unicode */
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  conv = ucnv_open("utf-8", &status);
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  U_ASSERT(status);
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status);
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  U_ASSERT(status);
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printUChars("uch", uchars, len);
638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n");
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /* Now, close the converter */
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  ucnv_close(conv);
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /* Now, convert to windows-1252 */
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  conv = ucnv_open("windows-1252", &status);
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  U_ASSERT(status);
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /* Converter starts out with the SUBSTITUTE callback set. */
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /* initialize our callback */
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  context = flagCB_fromU_openContext();
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /* Set our special callback */
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  ucnv_setFromUCallBack(conv,
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        flagCB_fromU,
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        context,
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        &(context->subCallback),
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        &(context->subContext),
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        &status);
659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  U_ASSERT(status);
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  len2 = ucnv_fromUChars(conv, bytes, 100, uchars, len, &status);
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  U_ASSERT(status);
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  flagVal = context->flag;  /* it's about to go away when we close the cnv */
666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  ucnv_close(conv);
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /* print out the original source */
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printBytes("bytes", bytes, len2);
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  return flagVal; /* true if callback was called */
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUErrorCode convsample_20()
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  const char *sample1 = "abc\xdf\xbf";
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  const char *sample2 = "abc_def";
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(convsample_20_didSubstitute(sample1))
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("DID substitute.\n******\n");
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  else
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("Did NOT substitute.\n*****\n");
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(convsample_20_didSubstitute(sample2))
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("DID substitute.\n******\n");
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  else
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("Did NOT substitute.\n*****\n");
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  return U_ZERO_ERROR;
700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 21  - C, callback, with clone and debug
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool convsample_21_didSubstitute(const char *source)
707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar uchars[100];
709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  char bytes[100];
710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UConverter *conv = NULL, *cloneCnv = NULL;
711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UErrorCode status = U_ZERO_ERROR;
712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uint32_t len, len2;
713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t  cloneLen;
714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UBool  flagVal = FALSE;
715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UConverterFromUCallback junkCB;
716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  FromUFLAGContext *flagCtx = NULL,
718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                   *cloneFlagCtx = NULL;
719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  debugCBContext   *debugCtx1 = NULL,
721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                   *debugCtx2 = NULL,
722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                   *cloneDebugCtx = NULL;
723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n\n==============================================\n"
725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         "Sample 21: C: Test for substitution w/ callbacks & clones \n");
726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /* print out the original source */
728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printBytes("src", source);
729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n");
730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /* First, convert from UTF8 to unicode */
732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  conv = ucnv_open("utf-8", &status);
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  U_ASSERT(status);
734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status);
736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  U_ASSERT(status);
737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printUChars("uch", uchars, len);
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n");
740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /* Now, close the converter */
742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  ucnv_close(conv);
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /* Now, convert to windows-1252 */
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  conv = ucnv_open("windows-1252", &status);
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  U_ASSERT(status);
747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /* Converter starts out with the SUBSTITUTE callback set. */
749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /* initialize our callback */
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /* from the 'bottom' innermost, out
752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *   CNV ->  debugCtx1[debug]  ->  flagCtx[flag] -> debugCtx2[debug]  */
753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if DEBUG_TMI
755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("flagCB_fromU = %p\n", &flagCB_fromU);
756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("debugCB_fromU = %p\n", &debugCB_fromU);
757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  debugCtx1 = debugCB_openContext();
760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   flagCtx  = flagCB_fromU_openContext();
761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  debugCtx2 = debugCB_openContext();
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  debugCtx1->subCallback =  flagCB_fromU;  /* debug1 -> flag */
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  debugCtx1->subContext  =  flagCtx;
765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  flagCtx->subCallback   =  debugCB_fromU; /*  flag -> debug2 */
767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  flagCtx->subContext    =  debugCtx2;
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  debugCtx2->subCallback =  UCNV_FROM_U_CALLBACK_SUBSTITUTE;
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  debugCtx2->subContext  = NULL;
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /* Set our special callback */
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  ucnv_setFromUCallBack(conv,
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        debugCB_fromU,
776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        debugCtx1,
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        &(debugCtx2->subCallback),
778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        &(debugCtx2->subContext),
779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        &status);
780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  U_ASSERT(status);
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if DEBUG_TMI
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("Callback chain now: Converter %p -> debug1:%p-> (%p:%p)==flag:%p -> debug2:%p -> cb %p\n",
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         conv, debugCtx1, debugCtx1->subCallback,
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         debugCtx1->subContext, flagCtx, debugCtx2, debugCtx2->subCallback);
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
78959d709d503bab6e2b61931737e662dd293b40578ccornelius  cloneCnv = ucnv_safeClone(conv, NULL, NULL, &status);
790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  U_ASSERT(status);
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if DEBUG_TMI
794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("Cloned converter from %p -> %p.  Closing %p.\n", conv, cloneCnv, conv);
795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  ucnv_close(conv);
798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if DEBUG_TMI
800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("%p closed.\n", conv);
801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  U_ASSERT(status);
804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /* Now, we have to extract the context */
805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  cloneDebugCtx = NULL;
806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  cloneFlagCtx  = NULL;
807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  ucnv_getFromUCallBack(cloneCnv, &junkCB, (const void **)&cloneDebugCtx);
809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(cloneDebugCtx != NULL) {
810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      cloneFlagCtx = (FromUFLAGContext*) cloneDebugCtx -> subContext;
811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("Cloned converter chain: %p -> %p[debug1] -> %p[flag] -> %p[debug2] -> substitute\n",
814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         cloneCnv, cloneDebugCtx, cloneFlagCtx, cloneFlagCtx?cloneFlagCtx->subContext:NULL );
815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  len2 = ucnv_fromUChars(cloneCnv, bytes, 100, uchars, len, &status);
817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  U_ASSERT(status);
818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(cloneFlagCtx != NULL) {
820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      flagVal = cloneFlagCtx->flag;  /* it's about to go away when we close the cnv */
821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  } else {
822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      printf("** Warning, couldn't get the subcallback \n");
823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  ucnv_close(cloneCnv);
826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /* print out the original source */
828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printBytes("bytes", bytes, len2);
829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  return flagVal; /* true if callback was called */
831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUErrorCode convsample_21()
834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  const char *sample1 = "abc\xdf\xbf";
836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  const char *sample2 = "abc_def";
837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(convsample_21_didSubstitute(sample1))
839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("DID substitute.\n******\n");
841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  else
843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("Did NOT substitute.\n*****\n");
845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(convsample_21_didSubstitute(sample2))
848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("DID substitute.\n******\n");
850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  else
852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("Did NOT substitute.\n*****\n");
854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  return U_ZERO_ERROR;
857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//  40-  C, cp37 -> UTF16 [data02.bin -> data40.utf16]
861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BUFFERSIZE 17 /* make it interesting :) */
863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUErrorCode convsample_40()
865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n\n==============================================\n"
867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    "Sample 40: C: convert data02.bin from cp37 to UTF16 [data40.utf16]\n");
868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  FILE *f;
870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  FILE *out;
871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t count;
872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  char inBuf[BUFFERSIZE];
873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  const char *source;
874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  const char *sourceLimit;
875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar *uBuf;
876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar *target;
877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar *targetLimit;
878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t uBufSize = 0;
879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UConverter *conv = NULL;
880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UErrorCode status = U_ZERO_ERROR;
881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uint32_t inbytes=0, total=0;
882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  f = fopen("data02.bin", "rb");
884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(!f)
885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fprintf(stderr, "Couldn't open file 'data02.bin' (cp37 data file).\n");
887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return U_FILE_ACCESS_ERROR;
888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  out = fopen("data40.utf16", "wb");
891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(!out)
892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fprintf(stderr, "Couldn't create file 'data40.utf16'.\n");
89450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fclose(f);
895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return U_FILE_ACCESS_ERROR;
896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // **************************** START SAMPLE *******************
899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  conv = ucnv_openCCSID(37, UCNV_IBM, &status);
900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  assert(U_SUCCESS(status));
901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("input bytes %d / min chars %d = %d UChars\n",
904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  assert(uBuf!=NULL);
907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // grab another buffer's worth
909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  while((!feof(f)) &&
910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    inbytes += count;
913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Convert bytes to unicode
915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    source = inBuf;
916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    sourceLimit = inBuf + count;
917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    do
919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        target = uBuf;
921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        targetLimit = uBuf + uBufSize;
922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ucnv_toUnicode( conv, &target, targetLimit,
924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       &source, sourceLimit, NULL,
925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       feof(f)?TRUE:FALSE,         /* pass 'flush' when eof */
926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                   /* is true (when no more data will come) */
927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         &status);
928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(status == U_BUFFER_OVERFLOW_ERROR)
930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          // simply ran out of space - we'll reset the target ptr the next
932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          // time through the loop.
933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          status = U_ZERO_ERROR;
934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else
936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          //  Check other errors here.
938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          assert(U_SUCCESS(status));
939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          // Break out of the loop (by force)
940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Process the Unicode
943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Todo: handle UTF-16/surrogates
944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        assert(fwrite(uBuf, sizeof(uBuf[0]), (target-uBuf), out) ==
945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru               (size_t)(target-uBuf));
946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        total += (target-uBuf);
947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } while (source < sourceLimit); // while simply out of space
948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("%d bytes in,  %d UChars out.\n", inbytes, total);
951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // ***************************** END SAMPLE ********************
953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  ucnv_close(conv);
954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  fclose(f);
956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  fclose(out);
957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n");
958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  return U_ZERO_ERROR;
960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#undef BUFFERSIZE
962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//  46-  C, UTF16 -> latin2 [data40.utf16 -> data46.out]
966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BUFFERSIZE 24 /* make it interesting :) */
968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUErrorCode convsample_46()
970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n\n==============================================\n"
972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    "Sample 46: C: convert data40.utf16 from UTF16 to latin2 [data46.out]\n");
973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  FILE *f;
975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  FILE *out;
976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t count;
977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar inBuf[BUFFERSIZE];
978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  const UChar *source;
979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  const UChar *sourceLimit;
980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  char *buf;
981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  char *target;
982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  char *targetLimit;
983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t bufSize = 0;
985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UConverter *conv = NULL;
986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UErrorCode status = U_ZERO_ERROR;
987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  uint32_t inchars=0, total=0;
988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  f = fopen("data40.utf16", "rb");
990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(!f)
991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fprintf(stderr, "Couldn't open file 'data40.utf16' (did you run convsample_40() ?)\n");
993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return U_FILE_ACCESS_ERROR;
994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  out = fopen("data46.out", "wb");
997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  if(!out)
998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fprintf(stderr, "Couldn't create file 'data46.out'.\n");
100050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fclose(f);
1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return U_FILE_ACCESS_ERROR;
1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // **************************** START SAMPLE *******************
1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  conv = ucnv_open( "iso-8859-2", &status);
1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  assert(U_SUCCESS(status));
1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  bufSize = (BUFFERSIZE*ucnv_getMaxCharSize(conv));
1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("input UChars[16] %d * max charsize %d = %d bytes output buffer\n",
1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         BUFFERSIZE, ucnv_getMaxCharSize(conv), bufSize);
1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  buf = (char*)malloc(bufSize * sizeof(char));
1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  assert(buf!=NULL);
1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // grab another buffer's worth
1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  while((!feof(f)) &&
1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ((count=fread(inBuf, sizeof(UChar), BUFFERSIZE , f)) > 0) )
1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  {
1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    inchars += count;
1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Convert bytes to unicode
1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    source = inBuf;
1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    sourceLimit = inBuf + count;
1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    do
1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        target = buf;
1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        targetLimit = buf + bufSize;
1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ucnv_fromUnicode( conv, &target, targetLimit,
1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       &source, sourceLimit, NULL,
1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       feof(f)?TRUE:FALSE,         /* pass 'flush' when eof */
1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                   /* is true (when no more data will come) */
1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         &status);
1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(status == U_BUFFER_OVERFLOW_ERROR)
1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          // simply ran out of space - we'll reset the target ptr the next
1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          // time through the loop.
1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          status = U_ZERO_ERROR;
1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else
1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          //  Check other errors here.
1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          assert(U_SUCCESS(status));
1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          // Break out of the loop (by force)
1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Process the Unicode
1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        assert(fwrite(buf, sizeof(buf[0]), (target-buf), out) ==
1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru               (size_t)(target-buf));
1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        total += (target-buf);
1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } while (source < sourceLimit); // while simply out of space
1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  }
1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("%d Uchars (%d bytes) in, %d chars out.\n", inchars, inchars * sizeof(UChar), total);
1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // ***************************** END SAMPLE ********************
1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  ucnv_close(conv);
1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  fclose(f);
1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  fclose(out);
1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("\n");
1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  return U_ZERO_ERROR;
1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#undef BUFFERSIZE
1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BUFFERSIZE 219
1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
107083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusvoid convsample_50() {
107183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius  printf("\n\n==============================================\n"
107283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius         "Sample 50: C: ucnv_detectUnicodeSignature\n");
107383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
107483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius  //! [ucnv_detectUnicodeSignature]
107583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius  UErrorCode err = U_ZERO_ERROR;
107683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius  UBool discardSignature = TRUE; /* set to TRUE to throw away the initial U+FEFF */
107783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius  char input[] = { '\xEF','\xBB', '\xBF','\x41','\x42','\x43' };
107883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius  int32_t signatureLength = 0;
107983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius  const char *encoding = ucnv_detectUnicodeSignature(input,sizeof(input),&signatureLength,&err);
108083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius  UConverter *conv = NULL;
108183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius  UChar output[100];
108283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius  UChar *target = output, *out;
108383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius  const char *source = input;
108483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius  if(encoding!=NULL && U_SUCCESS(err)){
108583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    // should signature be discarded ?
108683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    conv = ucnv_open(encoding, &err);
108783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    // do the conversion
108883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    ucnv_toUnicode(conv,
108983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                   &target, output + sizeof(output)/U_SIZEOF_UCHAR,
109083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                   &source, input + sizeof(input),
109183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                   NULL, TRUE, &err);
109283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    out = output;
109383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    if (discardSignature){
109483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius      ++out; // ignore initial U+FEFF
109583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    }
109683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    while(out != target) {
109783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius      printf("%04x ", *out++);
109883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    }
109983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    puts("");
110083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius  }
110183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius  //! [ucnv_detectUnicodeSignature]
110283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius  puts("");
110383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius}
110483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
110583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* main */
1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint main()
1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("Default Converter=%s\n", ucnv_getDefaultName() );
1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  convsample_02();  // C  , u->koi8r, conv
1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  convsample_03();  // C,   iterate
1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  convsample_05();  // C,  utf8->u, getNextUChar
1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  convsample_06(); // C freq counter thingy
1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  convsample_12();  // C,  sjis->u, conv
1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  convsample_13();  // C,  big5->u, getNextU
1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  convsample_20();  // C, callback
1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  convsample_21();  // C, callback debug
1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  convsample_40();  // C,   cp37 -> UTF16 [data02.bin -> data40.utf16]
1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  convsample_46();  // C,  UTF16 -> latin3 [data41.utf16 -> data46.out]
112983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
113083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius  convsample_50();  // C, detect unicode signature
1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  printf("End of converter samples.\n");
1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  fflush(stdout);
1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  fflush(stderr);
1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  return 0;
1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1139