1/**************************************************************************
2*
3*   Copyright (C) 2000-2011, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5*
6***************************************************************************
7*   file name:  convsamp.c
8*   encoding:   ASCII (7-bit)
9*
10*   created on: 2000may30
11*   created by: Steven R. Loomis
12*
13*   Sample code for the ICU conversion routines.
14*
15* Note: Nothing special is needed to build this sample. Link with
16*       the icu UC and icu I18N libraries.
17*
18*       I use 'assert' for error checking, you probably will want
19*       something more flexible.  '***BEGIN SAMPLE***' and
20*       '***END SAMPLE***' mark pieces suitable for stand alone
21*       code snippets.
22*
23*
24*  Each test can define it's own BUFFERSIZE
25*
26*/
27
28#define DEBUG_TMI 0  /* define to 1 to enable Too Much Information */
29
30#include <stdio.h>
31#include <ctype.h>            /* for isspace, etc.    */
32#include <assert.h>
33#include <string.h>
34#include <stdlib.h>  /* malloc */
35
36#include "unicode/utypes.h"   /* Basic ICU data types */
37#include "unicode/ucnv.h"     /* C   Converter API    */
38#include "unicode/ustring.h"  /* some more string fcns*/
39#include "unicode/uchar.h"    /* char names           */
40#include "unicode/uloc.h"
41#include "unicode/unistr.h"
42
43#include "flagcb.h"
44
45/* Some utility functions */
46
47static const UChar kNone[] = { 0x0000 };
48
49#define U_ASSERT(x)  { if(U_FAILURE(x)) {fflush(stdout);fflush(stderr); fprintf(stderr, #x " == %s\n", u_errorName(x)); assert(U_SUCCESS(x)); }}
50
51/* Print a UChar if possible, in seven characters. */
52void prettyPrintUChar(UChar c)
53{
54  if(  (c <= 0x007F) &&
55       (isgraph(c))  ) {
56    printf(" '%c'   ", (char)(0x00FF&c));
57  } else if ( c > 0x007F ) {
58    char buf[1000];
59    UErrorCode status = U_ZERO_ERROR;
60    int32_t o;
61
62    o = u_charName(c, U_UNICODE_CHAR_NAME, buf, 1000, &status);
63    if(U_SUCCESS(status) && (o>0) ) {
64      buf[6] = 0;
65      printf("%7s", buf);
66    } else {
67      o = u_charName(c, U_UNICODE_10_CHAR_NAME, buf, 1000, &status);
68      if(U_SUCCESS(status) && (o>0)) {
69        buf[5] = 0;
70        printf("~%6s", buf);
71      }
72      else {
73        printf(" ??????");
74      }
75    }
76  } else {
77    switch((char)(c & 0x007F)) {
78    case ' ':
79      printf(" ' '   ");
80      break;
81    case '\t':
82      printf(" \\t    ");
83      break;
84    case '\n':
85      printf(" \\n    ");
86      break;
87    default:
88      printf("  _    ");
89      break;
90    }
91  }
92}
93
94
95void printUChars(const char  *name = "?",
96                 const UChar *uch  = kNone,
97                 int32_t     len   = -1 )
98{
99  int32_t i;
100
101  if( (len == -1) && (uch) ) {
102    len = u_strlen(uch);
103  }
104
105  printf("%5s: ", name);
106  for( i = 0; i <len; i++) {
107    printf("%-6d ", i);
108  }
109  printf("\n");
110
111  printf("%5s: ", "uni");
112  for( i = 0; i <len; i++) {
113    printf("\\u%04X ", (int)uch[i]);
114  }
115  printf("\n");
116
117  printf("%5s:", "ch");
118  for( i = 0; i <len; i++) {
119    prettyPrintUChar(uch[i]);
120  }
121  printf("\n");
122}
123
124void printBytes(const char  *name = "?",
125                 const char *uch  = "",
126                 int32_t     len   = -1 )
127{
128  int32_t i;
129
130  if( (len == -1) && (uch) ) {
131    len = strlen(uch);
132  }
133
134  printf("%5s: ", name);
135  for( i = 0; i <len; i++) {
136    printf("%-4d ", i);
137  }
138  printf("\n");
139
140  printf("%5s: ", "uni");
141  for( i = 0; i <len; i++) {
142    printf("\\x%02X ", 0x00FF & (int)uch[i]);
143  }
144  printf("\n");
145
146  printf("%5s:", "ch");
147  for( i = 0; i <len; i++) {
148    if(isgraph(0x00FF & (int)uch[i])) {
149      printf(" '%c' ", (char)uch[i]);
150    } else {
151      printf("     ");
152    }
153  }
154  printf("\n");
155}
156
157void printUChar(UChar32 ch32)
158{
159    if(ch32 > 0xFFFF) {
160      printf("ch: U+%06X\n", ch32);
161    }
162    else {
163      UChar ch = (UChar)ch32;
164      printUChars("C", &ch, 1);
165    }
166}
167
168/*******************************************************************
169  Very simple C sample to convert the word 'Moscow' in Russian in Unicode,
170  followed by an exclamation mark (!) into the KOI8-R Russian code page.
171
172  This example first creates a UChar String out of the Unicode chars.
173
174  targetSize must be set to the amount of space available in the target
175  buffer. After fromUChars is called,
176  len will contain the number of bytes in target[] which were
177  used in the resulting codepage.  In this case, there is a 1:1 mapping
178  between the input and output characters. The exclamation mark has the
179  same value in both KOI8-R and Unicode.
180
181  src: 0      1      2      3      4      5      6
182  uni: \u041C \u043E \u0441 \u043A \u0432 \u0430 \u0021
183   ch: CYRILL CYRILL CYRILL CYRILL CYRILL CYRILL   '!'
184
185 targ:  0    1    2    3    4    5    6
186  uni: \xED \xCF \xD3 \xCB \xD7 \xC1 \x21
187   ch:                                '!'
188
189
190Converting FROM unicode
191  to koi8-r.
192  You must call ucnv_close to clean up the memory used by the
193  converter.
194
195  'len' returns the number of OUTPUT bytes resulting from the
196  conversion.
197 */
198
199UErrorCode convsample_02()
200{
201  printf("\n\n==============================================\n"
202         "Sample 02: C: simple Unicode -> koi8-r conversion\n");
203
204
205  // **************************** START SAMPLE *******************
206  // "cat<cat>OK"
207  UChar source[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432,
208                     0x0430, 0x0021, 0x0000 };
209  char target[100];
210  UErrorCode status = U_ZERO_ERROR;
211  UConverter *conv;
212  int32_t     len;
213
214  // set up the converter
215  conv = ucnv_open("koi8-r", &status);
216  assert(U_SUCCESS(status));
217
218  // convert to koi8-r
219  len = ucnv_fromUChars(conv, target, 100, source, -1, &status);
220  assert(U_SUCCESS(status));
221
222  // close the converter
223  ucnv_close(conv);
224
225  // ***************************** END SAMPLE ********************
226
227  // Print it out
228  printUChars("src", source);
229  printf("\n");
230  printBytes("targ", target, len);
231
232  return U_ZERO_ERROR;
233}
234
235
236UErrorCode convsample_03()
237{
238  printf("\n\n==============================================\n"
239         "Sample 03: C: print out all converters\n");
240
241  int32_t count;
242  int32_t i;
243
244  // **************************** START SAMPLE *******************
245  count = ucnv_countAvailable();
246  printf("Available converters: %d\n", count);
247
248  for(i=0;i<count;i++)
249  {
250    printf("%s ", ucnv_getAvailableName(i));
251  }
252
253  // ***************************** END SAMPLE ********************
254
255  printf("\n");
256
257  return U_ZERO_ERROR;
258}
259
260
261
262#define BUFFERSIZE 17 /* make it interesting :) */
263
264/*
265  Converting from a codepage to Unicode in bulk..
266  What is the best way to determine the buffer size?
267
268     The 'buffersize' is in bytes of input.
269    For a given converter, divinding this by the minimum char size
270    give you the maximum number of Unicode characters that could be
271    expected for a given number of input bytes.
272     see: ucnv_getMinCharSize()
273
274     For example, a single byte codepage like 'Latin-3' has a
275    minimum char size of 1. (It takes at least 1 byte to represent
276    each Unicode char.) So the unicode buffer has the same number of
277    UChars as the input buffer has bytes.
278
279     In a strictly double byte codepage such as cp1362 (Windows
280    Korean), the minimum char size is 2. So, only half as many Unicode
281    chars as bytes are needed.
282
283     This work to calculate the buffer size is an optimization. Any
284    size of input and output buffer can be used, as long as the
285    program handles the following cases: If the input buffer is empty,
286    the source pointer will be equal to sourceLimit.  If the output
287    buffer has overflowed, U_BUFFER_OVERFLOW_ERROR will be returned.
288 */
289
290UErrorCode convsample_05()
291{
292  printf("\n\n==============================================\n"
293         "Sample 05: C: count the number of letters in a UTF-8 document\n");
294
295  FILE *f;
296  int32_t count;
297  char inBuf[BUFFERSIZE];
298  const char *source;
299  const char *sourceLimit;
300  UChar *uBuf;
301  UChar *target;
302  UChar *targetLimit;
303  UChar *p;
304  int32_t uBufSize = 0;
305  UConverter *conv;
306  UErrorCode status = U_ZERO_ERROR;
307  uint32_t letters=0, total=0;
308
309  f = fopen("data01.txt", "r");
310  if(!f)
311  {
312    fprintf(stderr, "Couldn't open file 'data01.txt' (UTF-8 data file).\n");
313    return U_FILE_ACCESS_ERROR;
314  }
315
316  // **************************** START SAMPLE *******************
317  conv = ucnv_open("utf-8", &status);
318  assert(U_SUCCESS(status));
319
320  uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
321  printf("input bytes %d / min chars %d = %d UChars\n",
322         BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
323  uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
324  assert(uBuf!=NULL);
325
326  // grab another buffer's worth
327  while((!feof(f)) &&
328        ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
329  {
330    // Convert bytes to unicode
331    source = inBuf;
332    sourceLimit = inBuf + count;
333
334    do
335    {
336        target = uBuf;
337        targetLimit = uBuf + uBufSize;
338
339        ucnv_toUnicode(conv, &target, targetLimit,
340                       &source, sourceLimit, NULL,
341                       feof(f)?TRUE:FALSE,         /* pass 'flush' when eof */
342                                   /* is true (when no more data will come) */
343                       &status);
344
345        if(status == U_BUFFER_OVERFLOW_ERROR)
346        {
347          // simply ran out of space - we'll reset the target ptr the next
348          // time through the loop.
349          status = U_ZERO_ERROR;
350        }
351        else
352        {
353          //  Check other errors here.
354          assert(U_SUCCESS(status));
355          // Break out of the loop (by force)
356        }
357
358        // Process the Unicode
359        // Todo: handle UTF-16/surrogates
360
361        for(p = uBuf; p<target; p++)
362        {
363          if(u_isalpha(*p))
364            letters++;
365          total++;
366        }
367    } while (source < sourceLimit); // while simply out of space
368  }
369
370  printf("%d letters out of %d total UChars.\n", letters, total);
371
372  // ***************************** END SAMPLE ********************
373  ucnv_close(conv);
374
375  printf("\n");
376
377  fclose(f);
378
379  return U_ZERO_ERROR;
380}
381#undef BUFFERSIZE
382
383#define BUFFERSIZE 1024
384typedef struct
385{
386  UChar32  codepoint;
387  uint32_t frequency;
388} CharFreqInfo;
389
390UErrorCode convsample_06()
391{
392  printf("\n\n==============================================\n"
393         "Sample 06: C: frequency distribution of letters in a UTF-8 document\n");
394
395  FILE *f;
396  int32_t count;
397  char inBuf[BUFFERSIZE];
398  const char *source;
399  const char *sourceLimit;
400  int32_t uBufSize = 0;
401  UConverter *conv;
402  UErrorCode status = U_ZERO_ERROR;
403  uint32_t letters=0, total=0;
404
405  CharFreqInfo   *info;
406  UChar32   charCount = 0x10000;  /* increase this if you want to handle non bmp.. todo: automatically bump it.. */
407  UChar32   p;
408
409  uint32_t ie = 0;
410  uint32_t gh = 0;
411  UChar32 l = 0;
412
413  f = fopen("data06.txt", "r");
414  if(!f)
415  {
416    fprintf(stderr, "Couldn't open file 'data06.txt' (UTF-8 data file).\n");
417    return U_FILE_ACCESS_ERROR;
418  }
419
420  info = (CharFreqInfo*)malloc(sizeof(CharFreqInfo) * charCount);
421  if(!info)
422  {
423    fprintf(stderr, " Couldn't allocate %d bytes for freq counter\n", sizeof(CharFreqInfo)*charCount);
424  }
425
426  /* reset frequencies */
427  for(p=0;p<charCount;p++)
428  {
429    info[p].codepoint = p;
430    info[p].frequency = 0;
431  }
432
433  // **************************** START SAMPLE *******************
434  conv = ucnv_open("utf-8", &status);
435  assert(U_SUCCESS(status));
436
437  uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
438  printf("input bytes %d / min chars %d = %d UChars\n",
439         BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
440
441  // grab another buffer's worth
442  while((!feof(f)) &&
443        ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
444  {
445    // Convert bytes to unicode
446    source = inBuf;
447    sourceLimit = inBuf + count;
448
449    while(source < sourceLimit)
450    {
451      p = ucnv_getNextUChar(conv, &source, sourceLimit, &status);
452      if(U_FAILURE(status))
453      {
454        fprintf(stderr, "%s @ %d\n", u_errorName(status), total);
455        status = U_ZERO_ERROR;
456        continue;
457      }
458      U_ASSERT(status);
459      total++;
460
461      if(u_isalpha(p))
462        letters++;
463
464      if((u_tolower(l) == 'i') && (u_tolower(p) == 'e'))
465        ie++;
466
467      if((u_tolower(l) == 'g') && (u_tolower(p) == 0x0127))
468        gh++;
469
470      if(p>charCount)
471      {
472        fprintf(stderr, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p);
473        free(info);
474        fclose(f);
475        ucnv_close(conv);
476        return U_UNSUPPORTED_ERROR;
477      }
478      info[p].frequency++;
479      l = p;
480    }
481  }
482
483  fclose(f);
484  ucnv_close(conv);
485
486  printf("%d letters out of %d total UChars.\n", letters, total);
487  printf("%d ie digraphs, %d gh digraphs.\n", ie, gh);
488
489  // now, we could sort it..
490
491  //  qsort(info, charCount, sizeof(info[0]), charfreq_compare);
492
493  for(p=0;p<charCount;p++)
494  {
495    if(info[p].frequency)
496    {
497      printf("% 5d U+%06X ", info[p].frequency, p);
498      if(p <= 0xFFFF)
499      {
500        prettyPrintUChar((UChar)p);
501      }
502      printf("\n");
503    }
504  }
505  free(info);
506  // ***************************** END SAMPLE ********************
507
508  printf("\n");
509
510  return U_ZERO_ERROR;
511}
512#undef BUFFERSIZE
513
514
515/******************************************************
516  You must call ucnv_close to clean up the memory used by the
517  converter.
518
519  'len' returns the number of OUTPUT bytes resulting from the
520  conversion.
521 */
522
523UErrorCode convsample_12()
524{
525  printf("\n\n==============================================\n"
526         "Sample 12: C: simple sjis -> unicode conversion\n");
527
528
529  // **************************** START SAMPLE *******************
530
531  char source[] = { 0x63, 0x61, 0x74, (char)0x94, 0x4C, (char)0x82, 0x6E, (char)0x82, 0x6A, 0x00 };
532  UChar target[100];
533  UErrorCode status = U_ZERO_ERROR;
534  UConverter *conv;
535  int32_t     len;
536
537  // set up the converter
538  conv = ucnv_open("shift_jis", &status);
539  assert(U_SUCCESS(status));
540
541  // convert to Unicode
542  // Note: we can use strlen, we know it's an 8 bit null terminated codepage
543  target[6] = 0xFDCA;
544  len = ucnv_toUChars(conv, target, 100, source, strlen(source), &status);
545  U_ASSERT(status);
546  // close the converter
547  ucnv_close(conv);
548
549  // ***************************** END SAMPLE ********************
550
551  // Print it out
552  printBytes("src", source, strlen(source) );
553  printf("\n");
554  printUChars("targ", target, len);
555
556  return U_ZERO_ERROR;
557}
558
559/******************************************************************
560   C: Convert from codepage to Unicode one at a time.
561*/
562
563UErrorCode convsample_13()
564{
565  printf("\n\n==============================================\n"
566         "Sample 13: C: simple Big5 -> unicode conversion, char at a time\n");
567
568
569  const char sourceChars[] = { 0x7a, 0x68, 0x3d, (char)0xa4, (char)0xa4, (char)0xa4, (char)0xe5, (char)0x2e };
570  //  const char sourceChars[] = { 0x7a, 0x68, 0x3d, 0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87, 0x2e };
571  const char *source, *sourceLimit;
572  UChar32 target;
573  UErrorCode status = U_ZERO_ERROR;
574  UConverter *conv = NULL;
575  int32_t srcCount=0;
576  int32_t dstCount=0;
577
578  srcCount = sizeof(sourceChars);
579
580  conv = ucnv_open("Big5", &status);
581  U_ASSERT(status);
582
583  source = sourceChars;
584  sourceLimit = sourceChars + sizeof(sourceChars);
585
586  // **************************** START SAMPLE *******************
587
588
589  printBytes("src",source,sourceLimit-source);
590
591  while(source < sourceLimit)
592  {
593    puts("");
594    target = ucnv_getNextUChar (conv,
595                                &source,
596                                sourceLimit,
597                                &status);
598
599    //    printBytes("src",source,sourceLimit-source);
600    U_ASSERT(status);
601    printUChar(target);
602    dstCount++;
603  }
604
605
606  // ************************** END SAMPLE *************************
607
608  printf("src=%d bytes, dst=%d uchars\n", srcCount, dstCount);
609  ucnv_close(conv);
610
611  return U_ZERO_ERROR;
612}
613
614
615
616
617UBool convsample_20_didSubstitute(const char *source)
618{
619  UChar uchars[100];
620  char bytes[100];
621  UConverter *conv = NULL;
622  UErrorCode status = U_ZERO_ERROR;
623  uint32_t len, len2;
624  UBool  flagVal;
625
626  FromUFLAGContext * context = NULL;
627
628  printf("\n\n==============================================\n"
629         "Sample 20: C: Test for substitution using callbacks\n");
630
631  /* print out the original source */
632  printBytes("src", source);
633  printf("\n");
634
635  /* First, convert from UTF8 to unicode */
636  conv = ucnv_open("utf-8", &status);
637  U_ASSERT(status);
638
639  len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status);
640  U_ASSERT(status);
641
642  printUChars("uch", uchars, len);
643  printf("\n");
644
645  /* Now, close the converter */
646  ucnv_close(conv);
647
648  /* Now, convert to windows-1252 */
649  conv = ucnv_open("windows-1252", &status);
650  U_ASSERT(status);
651
652  /* Converter starts out with the SUBSTITUTE callback set. */
653
654  /* initialize our callback */
655  context = flagCB_fromU_openContext();
656
657  /* Set our special callback */
658  ucnv_setFromUCallBack(conv,
659                        flagCB_fromU,
660                        context,
661                        &(context->subCallback),
662                        &(context->subContext),
663                        &status);
664
665  U_ASSERT(status);
666
667  len2 = ucnv_fromUChars(conv, bytes, 100, uchars, len, &status);
668  U_ASSERT(status);
669
670  flagVal = context->flag;  /* it's about to go away when we close the cnv */
671
672  ucnv_close(conv);
673
674  /* print out the original source */
675  printBytes("bytes", bytes, len2);
676
677  return flagVal; /* true if callback was called */
678}
679
680UErrorCode convsample_20()
681{
682  const char *sample1 = "abc\xdf\xbf";
683  const char *sample2 = "abc_def";
684
685
686  if(convsample_20_didSubstitute(sample1))
687  {
688    printf("DID substitute.\n******\n");
689  }
690  else
691  {
692    printf("Did NOT substitute.\n*****\n");
693  }
694
695  if(convsample_20_didSubstitute(sample2))
696  {
697    printf("DID substitute.\n******\n");
698  }
699  else
700  {
701    printf("Did NOT substitute.\n*****\n");
702  }
703
704  return U_ZERO_ERROR;
705}
706
707// 21  - C, callback, with clone and debug
708
709
710
711UBool convsample_21_didSubstitute(const char *source)
712{
713  UChar uchars[100];
714  char bytes[100];
715  UConverter *conv = NULL, *cloneCnv = NULL;
716  UErrorCode status = U_ZERO_ERROR;
717  uint32_t len, len2;
718  int32_t  cloneLen;
719  UBool  flagVal = FALSE;
720  UConverterFromUCallback junkCB;
721
722  FromUFLAGContext *flagCtx = NULL,
723                   *cloneFlagCtx = NULL;
724
725  debugCBContext   *debugCtx1 = NULL,
726                   *debugCtx2 = NULL,
727                   *cloneDebugCtx = NULL;
728
729  printf("\n\n==============================================\n"
730         "Sample 21: C: Test for substitution w/ callbacks & clones \n");
731
732  /* print out the original source */
733  printBytes("src", source);
734  printf("\n");
735
736  /* First, convert from UTF8 to unicode */
737  conv = ucnv_open("utf-8", &status);
738  U_ASSERT(status);
739
740  len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status);
741  U_ASSERT(status);
742
743  printUChars("uch", uchars, len);
744  printf("\n");
745
746  /* Now, close the converter */
747  ucnv_close(conv);
748
749  /* Now, convert to windows-1252 */
750  conv = ucnv_open("windows-1252", &status);
751  U_ASSERT(status);
752
753  /* Converter starts out with the SUBSTITUTE callback set. */
754
755  /* initialize our callback */
756  /* from the 'bottom' innermost, out
757   *   CNV ->  debugCtx1[debug]  ->  flagCtx[flag] -> debugCtx2[debug]  */
758
759#if DEBUG_TMI
760  printf("flagCB_fromU = %p\n", &flagCB_fromU);
761  printf("debugCB_fromU = %p\n", &debugCB_fromU);
762#endif
763
764  debugCtx1 = debugCB_openContext();
765   flagCtx  = flagCB_fromU_openContext();
766  debugCtx2 = debugCB_openContext();
767
768  debugCtx1->subCallback =  flagCB_fromU;  /* debug1 -> flag */
769  debugCtx1->subContext  =  flagCtx;
770
771  flagCtx->subCallback   =  debugCB_fromU; /*  flag -> debug2 */
772  flagCtx->subContext    =  debugCtx2;
773
774  debugCtx2->subCallback =  UCNV_FROM_U_CALLBACK_SUBSTITUTE;
775  debugCtx2->subContext  = NULL;
776
777  /* Set our special callback */
778
779  ucnv_setFromUCallBack(conv,
780                        debugCB_fromU,
781                        debugCtx1,
782                        &(debugCtx2->subCallback),
783                        &(debugCtx2->subContext),
784                        &status);
785
786  U_ASSERT(status);
787
788#if DEBUG_TMI
789  printf("Callback chain now: Converter %p -> debug1:%p-> (%p:%p)==flag:%p -> debug2:%p -> cb %p\n",
790         conv, debugCtx1, debugCtx1->subCallback,
791         debugCtx1->subContext, flagCtx, debugCtx2, debugCtx2->subCallback);
792#endif
793
794  cloneLen = 1; /* but passing in null so it will clone */
795  cloneCnv = ucnv_safeClone(conv,  NULL,  &cloneLen, &status);
796
797  U_ASSERT(status);
798
799#if DEBUG_TMI
800  printf("Cloned converter from %p -> %p.  Closing %p.\n", conv, cloneCnv, conv);
801#endif
802
803  ucnv_close(conv);
804
805#if DEBUG_TMI
806  printf("%p closed.\n", conv);
807#endif
808
809  U_ASSERT(status);
810  /* Now, we have to extract the context */
811  cloneDebugCtx = NULL;
812  cloneFlagCtx  = NULL;
813
814  ucnv_getFromUCallBack(cloneCnv, &junkCB, (const void **)&cloneDebugCtx);
815  if(cloneDebugCtx != NULL) {
816      cloneFlagCtx = (FromUFLAGContext*) cloneDebugCtx -> subContext;
817  }
818
819  printf("Cloned converter chain: %p -> %p[debug1] -> %p[flag] -> %p[debug2] -> substitute\n",
820         cloneCnv, cloneDebugCtx, cloneFlagCtx, cloneFlagCtx?cloneFlagCtx->subContext:NULL );
821
822  len2 = ucnv_fromUChars(cloneCnv, bytes, 100, uchars, len, &status);
823  U_ASSERT(status);
824
825  if(cloneFlagCtx != NULL) {
826      flagVal = cloneFlagCtx->flag;  /* it's about to go away when we close the cnv */
827  } else {
828      printf("** Warning, couldn't get the subcallback \n");
829  }
830
831  ucnv_close(cloneCnv);
832
833  /* print out the original source */
834  printBytes("bytes", bytes, len2);
835
836  return flagVal; /* true if callback was called */
837}
838
839UErrorCode convsample_21()
840{
841  const char *sample1 = "abc\xdf\xbf";
842  const char *sample2 = "abc_def";
843
844  if(convsample_21_didSubstitute(sample1))
845  {
846    printf("DID substitute.\n******\n");
847  }
848  else
849  {
850    printf("Did NOT substitute.\n*****\n");
851  }
852
853  if(convsample_21_didSubstitute(sample2))
854  {
855    printf("DID substitute.\n******\n");
856  }
857  else
858  {
859    printf("Did NOT substitute.\n*****\n");
860  }
861
862  return U_ZERO_ERROR;
863}
864
865
866//  40-  C, cp37 -> UTF16 [data02.bin -> data40.utf16]
867
868#define BUFFERSIZE 17 /* make it interesting :) */
869
870UErrorCode convsample_40()
871{
872  printf("\n\n==============================================\n"
873    "Sample 40: C: convert data02.bin from cp37 to UTF16 [data40.utf16]\n");
874
875  FILE *f;
876  FILE *out;
877  int32_t count;
878  char inBuf[BUFFERSIZE];
879  const char *source;
880  const char *sourceLimit;
881  UChar *uBuf;
882  UChar *target;
883  UChar *targetLimit;
884  int32_t uBufSize = 0;
885  UConverter *conv = NULL;
886  UErrorCode status = U_ZERO_ERROR;
887  uint32_t inbytes=0, total=0;
888
889  f = fopen("data02.bin", "rb");
890  if(!f)
891  {
892    fprintf(stderr, "Couldn't open file 'data02.bin' (cp37 data file).\n");
893    return U_FILE_ACCESS_ERROR;
894  }
895
896  out = fopen("data40.utf16", "wb");
897  if(!out)
898  {
899    fprintf(stderr, "Couldn't create file 'data40.utf16'.\n");
900    fclose(f);
901    return U_FILE_ACCESS_ERROR;
902  }
903
904  // **************************** START SAMPLE *******************
905  conv = ucnv_openCCSID(37, UCNV_IBM, &status);
906  assert(U_SUCCESS(status));
907
908  uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
909  printf("input bytes %d / min chars %d = %d UChars\n",
910         BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
911  uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
912  assert(uBuf!=NULL);
913
914  // grab another buffer's worth
915  while((!feof(f)) &&
916        ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
917  {
918    inbytes += count;
919
920    // Convert bytes to unicode
921    source = inBuf;
922    sourceLimit = inBuf + count;
923
924    do
925    {
926        target = uBuf;
927        targetLimit = uBuf + uBufSize;
928
929        ucnv_toUnicode( conv, &target, targetLimit,
930                       &source, sourceLimit, NULL,
931                       feof(f)?TRUE:FALSE,         /* pass 'flush' when eof */
932                                   /* is true (when no more data will come) */
933                         &status);
934
935        if(status == U_BUFFER_OVERFLOW_ERROR)
936        {
937          // simply ran out of space - we'll reset the target ptr the next
938          // time through the loop.
939          status = U_ZERO_ERROR;
940        }
941        else
942        {
943          //  Check other errors here.
944          assert(U_SUCCESS(status));
945          // Break out of the loop (by force)
946        }
947
948        // Process the Unicode
949        // Todo: handle UTF-16/surrogates
950        assert(fwrite(uBuf, sizeof(uBuf[0]), (target-uBuf), out) ==
951               (size_t)(target-uBuf));
952        total += (target-uBuf);
953    } while (source < sourceLimit); // while simply out of space
954  }
955
956  printf("%d bytes in,  %d UChars out.\n", inbytes, total);
957
958  // ***************************** END SAMPLE ********************
959  ucnv_close(conv);
960
961  fclose(f);
962  fclose(out);
963  printf("\n");
964
965  return U_ZERO_ERROR;
966}
967#undef BUFFERSIZE
968
969
970
971//  46-  C, UTF16 -> latin2 [data40.utf16 -> data46.out]
972
973#define BUFFERSIZE 24 /* make it interesting :) */
974
975UErrorCode convsample_46()
976{
977  printf("\n\n==============================================\n"
978    "Sample 46: C: convert data40.utf16 from UTF16 to latin2 [data46.out]\n");
979
980  FILE *f;
981  FILE *out;
982  int32_t count;
983  UChar inBuf[BUFFERSIZE];
984  const UChar *source;
985  const UChar *sourceLimit;
986  char *buf;
987  char *target;
988  char *targetLimit;
989
990  int32_t bufSize = 0;
991  UConverter *conv = NULL;
992  UErrorCode status = U_ZERO_ERROR;
993  uint32_t inchars=0, total=0;
994
995  f = fopen("data40.utf16", "rb");
996  if(!f)
997  {
998    fprintf(stderr, "Couldn't open file 'data40.utf16' (did you run convsample_40() ?)\n");
999    return U_FILE_ACCESS_ERROR;
1000  }
1001
1002  out = fopen("data46.out", "wb");
1003  if(!out)
1004  {
1005    fprintf(stderr, "Couldn't create file 'data46.out'.\n");
1006    fclose(f);
1007    return U_FILE_ACCESS_ERROR;
1008  }
1009
1010  // **************************** START SAMPLE *******************
1011  conv = ucnv_open( "iso-8859-2", &status);
1012  assert(U_SUCCESS(status));
1013
1014  bufSize = (BUFFERSIZE*ucnv_getMaxCharSize(conv));
1015  printf("input UChars[16] %d * max charsize %d = %d bytes output buffer\n",
1016         BUFFERSIZE, ucnv_getMaxCharSize(conv), bufSize);
1017  buf = (char*)malloc(bufSize * sizeof(char));
1018  assert(buf!=NULL);
1019
1020  // grab another buffer's worth
1021  while((!feof(f)) &&
1022        ((count=fread(inBuf, sizeof(UChar), BUFFERSIZE , f)) > 0) )
1023  {
1024    inchars += count;
1025
1026    // Convert bytes to unicode
1027    source = inBuf;
1028    sourceLimit = inBuf + count;
1029
1030    do
1031    {
1032        target = buf;
1033        targetLimit = buf + bufSize;
1034
1035        ucnv_fromUnicode( conv, &target, targetLimit,
1036                       &source, sourceLimit, NULL,
1037                       feof(f)?TRUE:FALSE,         /* pass 'flush' when eof */
1038                                   /* is true (when no more data will come) */
1039                         &status);
1040
1041        if(status == U_BUFFER_OVERFLOW_ERROR)
1042        {
1043          // simply ran out of space - we'll reset the target ptr the next
1044          // time through the loop.
1045          status = U_ZERO_ERROR;
1046        }
1047        else
1048        {
1049          //  Check other errors here.
1050          assert(U_SUCCESS(status));
1051          // Break out of the loop (by force)
1052        }
1053
1054        // Process the Unicode
1055        assert(fwrite(buf, sizeof(buf[0]), (target-buf), out) ==
1056               (size_t)(target-buf));
1057        total += (target-buf);
1058    } while (source < sourceLimit); // while simply out of space
1059  }
1060
1061  printf("%d Uchars (%d bytes) in, %d chars out.\n", inchars, inchars * sizeof(UChar), total);
1062
1063  // ***************************** END SAMPLE ********************
1064  ucnv_close(conv);
1065
1066  fclose(f);
1067  fclose(out);
1068  printf("\n");
1069
1070  return U_ZERO_ERROR;
1071}
1072#undef BUFFERSIZE
1073
1074#define BUFFERSIZE 219
1075
1076
1077/* main */
1078
1079int main()
1080{
1081
1082  printf("Default Converter=%s\n", ucnv_getDefaultName() );
1083
1084  convsample_02();  // C  , u->koi8r, conv
1085  convsample_03();  // C,   iterate
1086
1087  convsample_05();  // C,  utf8->u, getNextUChar
1088  convsample_06(); // C freq counter thingy
1089
1090  convsample_12();  // C,  sjis->u, conv
1091  convsample_13();  // C,  big5->u, getNextU
1092
1093  convsample_20();  // C, callback
1094  convsample_21();  // C, callback debug
1095
1096  convsample_40();  // C,   cp37 -> UTF16 [data02.bin -> data40.utf16]
1097
1098  convsample_46();  // C,  UTF16 -> latin3 [data41.utf16 -> data46.out]
1099
1100  printf("End of converter samples.\n");
1101
1102  fflush(stdout);
1103  fflush(stderr);
1104
1105  return 0;
1106}
1107