1/********************************************************************
2 * Copyright (c) 1997-2009, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************
5 *
6 * File UCNVSELTST.C
7 *
8 * Modification History:
9 *        Name                     Description
10 *     MOHAMED ELDAWY               Creation
11 ********************************************************************
12 */
13
14/* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/
15
16#include "ucnvseltst.h"
17
18#include <stdio.h>
19
20#include "unicode/utypes.h"
21#include "unicode/ucnvsel.h"
22#include "unicode/ustring.h"
23#include "cmemory.h"
24#include "cstring.h"
25
26#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
27
28#define FILENAME_BUFFER 1024
29
30#define TDSRCPATH  ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING
31
32static void TestSelector(void);
33void addCnvSelTest(TestNode** root);  /* Declaration required to suppress compiler warnings. */
34
35void addCnvSelTest(TestNode** root)
36{
37    addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector");
38}
39
40static const char **gAvailableNames = NULL;
41static int32_t gCountAvailable = 0;
42
43static UBool
44getAvailableNames() {
45  int32_t i;
46  if (gAvailableNames != NULL) {
47    return TRUE;
48  }
49  gCountAvailable = ucnv_countAvailable();
50  if (gCountAvailable == 0) {
51    log_data_err("No converters available.\n");
52    return FALSE;
53  }
54  gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const char *));
55  if (gAvailableNames == NULL) {
56    log_err("unable to allocate memory for %ld available converter names\n",
57            (long)gCountAvailable);
58    return FALSE;
59  }
60  for (i = 0; i < gCountAvailable; ++i) {
61    gAvailableNames[i] = ucnv_getAvailableName(i);
62  }
63  return TRUE;
64}
65
66static void
67releaseAvailableNames() {
68  uprv_free((void *)gAvailableNames);
69  gAvailableNames = NULL;
70  gCountAvailable = 0;
71}
72
73static const char **
74getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) {
75  const char **names;
76  int32_t i;
77
78  *pCount = 0;
79  if (count <= 0) {
80    return NULL;
81  }
82  names = (const char **)uprv_malloc(count * sizeof(char *));
83  if (names == NULL) {
84    log_err("memory allocation error for %ld pointers\n", (long)count);
85    return NULL;
86  }
87  if (step == 0 && count > 0) {
88    step = 1;
89  }
90  for (i = 0; i < count; ++i) {
91    if (0 <= start && start < gCountAvailable) {
92      names[i] = gAvailableNames[start];
93      start += step;
94      ++*pCount;
95    }
96  }
97  return names;
98}
99
100#if 0
101/*
102 * ucnvsel_open() does not support "no encodings":
103 * Given 0 encodings it will open a selector for all available ones.
104 */
105static const char **
106getNoEncodings(int32_t *pCount) {
107  *pCount = 0;
108  return NULL;
109}
110#endif
111
112static const char **
113getOneEncoding(int32_t *pCount) {
114  return getEncodings(1, 0, 1, pCount);
115}
116
117static const char **
118getFirstEvenEncodings(int32_t *pCount) {
119  return getEncodings(0, 2, 25, pCount);
120}
121
122static const char **
123getMiddleEncodings(int32_t *pCount) {
124  return getEncodings(gCountAvailable - 12, 1, 22, pCount);
125}
126
127static const char **
128getLastEncodings(int32_t *pCount) {
129  return getEncodings(gCountAvailable - 1, -1, 25, pCount);
130}
131
132static const char **
133getSomeEncodings(int32_t *pCount) {
134  /* 20 evenly distributed */
135  return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount);
136}
137
138static const char **
139getEveryThirdEncoding(int32_t *pCount) {
140  return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount);
141}
142
143static const char **
144getAllEncodings(int32_t *pCount) {
145  return getEncodings(0, 1, gCountAvailable, pCount);
146}
147
148typedef const char **GetEncodingsFn(int32_t *);
149
150static GetEncodingsFn *const getEncodingsFns[] = {
151  getOneEncoding,
152  getFirstEvenEncodings,
153  getMiddleEncodings,
154  getLastEncodings,
155  getSomeEncodings,
156  getEveryThirdEncoding,
157  getAllEncodings
158};
159
160static FILE *fopenOrError(const char *filename) {
161    int32_t needLen;
162    FILE *f;
163    char fnbuf[FILENAME_BUFFER];
164    const char* directory= ctest_dataSrcDir();
165    needLen = uprv_strlen(directory)+uprv_strlen(TDSRCPATH)+uprv_strlen(filename)+1;
166    if(needLen > FILENAME_BUFFER) {
167        log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n",
168                filename, needLen, FILENAME_BUFFER);
169        return NULL;
170    }
171
172    strcpy(fnbuf, directory);
173    strcat(fnbuf, TDSRCPATH);
174    strcat(fnbuf, filename);
175
176    f = fopen(fnbuf, "rb");
177
178    if(f == NULL) {
179        log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename);
180    }
181    return f;
182}
183
184typedef struct TestText {
185  char *text, *textLimit;
186  char *limit;
187  int32_t number;
188} TestText;
189
190static void
191text_reset(TestText *tt) {
192  tt->limit = tt->text;
193  tt->number = 0;
194}
195
196static char *
197text_nextString(TestText *tt, int32_t *pLength) {
198  char *s = tt->limit;
199  if (s == tt->textLimit) {
200    /* we already delivered the last string */
201    return NULL;
202  } else if (s == tt->text) {
203    /* first string */
204    if ((tt->textLimit - tt->text) >= 3 &&
205        s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf
206    ) {
207      s += 3;  /* skip the UTF-8 signature byte sequence (U+FEFF) */
208    }
209  } else {
210    /* skip the string terminator */
211    ++s;
212    ++tt->number;
213  }
214
215  /* find the end of this string */
216  tt->limit = uprv_strchr(s, 0);
217  *pLength = (int32_t)(tt->limit - s);
218  return s;
219}
220
221static UBool
222text_open(TestText *tt) {
223  FILE *f;
224  char *s;
225  int32_t length;
226  uprv_memset(tt, 0, sizeof(TestText));
227  f = fopenOrError("ConverterSelectorTestUTF8.txt");
228  if(!f) {
229    return FALSE;
230  }
231  fseek(f, 0, SEEK_END);
232  length = (int32_t)ftell(f);
233  fseek(f, 0, SEEK_SET);
234  tt->text = (char *)uprv_malloc(length + 1);
235  if (tt->text == NULL) {
236    fclose(f);
237    return FALSE;
238  }
239  if (length != fread(tt->text, 1, length, f)) {
240    log_err("error reading %ld bytes from test text file\n", (long)length);
241    length = 0;
242    uprv_free(tt->text);
243  }
244  fclose(f);
245  tt->textLimit = tt->text + length;
246  *tt->textLimit = 0;
247  /* replace all Unicode '#' (U+0023) with NUL */
248  for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {}
249  text_reset(tt);
250  return TRUE;
251}
252
253static void
254text_close(TestText *tt) {
255  uprv_free(tt->text);
256}
257
258static int32_t findIndex(const char* converterName) {
259  int32_t i;
260  for (i = 0 ; i < gCountAvailable; i++) {
261    if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) {
262      return i;
263    }
264  }
265  return -1;
266}
267
268static UBool *
269getResultsManually(const char** encodings, int32_t num_encodings,
270                   const char *utf8, int32_t length,
271                   const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) {
272  UBool* resultsManually;
273  int32_t i;
274
275  resultsManually = (UBool*) uprv_malloc(gCountAvailable);
276  uprv_memset(resultsManually, 0, gCountAvailable);
277
278  for(i = 0 ; i < num_encodings ; i++) {
279    UErrorCode status = U_ZERO_ERROR;
280    /* get unicode set for that converter */
281    USet* set;
282    UConverter* test_converter;
283    UChar32 cp;
284    int32_t encIndex, offset;
285
286    set = uset_openEmpty();
287    test_converter = ucnv_open(encodings[i], &status);
288    ucnv_getUnicodeSet(test_converter, set,
289                       whichSet, &status);
290    if (excludedCodePoints != NULL) {
291      uset_addAll(set, excludedCodePoints);
292    }
293    uset_freeze(set);
294    offset = 0;
295    cp = 0;
296
297    encIndex = findIndex(encodings[i]);
298    /*
299     * The following is almost, but not entirely, the same as
300     * resultsManually[encIndex] =
301     *   (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length);
302     * They might be different if the set contains strings,
303     * or if the utf8 string contains an illegal sequence.
304     *
305     * The UConverterSelector does not currently handle strings that can be
306     * converted, and it treats an illegal sequence as convertible
307     * while uset_spanUTF8() treats it like U+FFFD which may not be convertible.
308     */
309    resultsManually[encIndex] = TRUE;
310    while(offset<length) {
311      U8_NEXT(utf8, offset, length, cp);
312      if (cp >= 0 && !uset_contains(set, cp)) {
313        resultsManually[encIndex] = FALSE;
314        break;
315      }
316    }
317    uset_close(set);
318    ucnv_close(test_converter);
319  }
320  return resultsManually;
321}
322
323/* closes res but does not free resultsManually */
324static void verifyResult(UEnumeration* res, const UBool *resultsManually) {
325  UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool));
326  const char* name;
327  UErrorCode status = U_ZERO_ERROR;
328  int32_t i;
329
330  /* fill the bool for the selector results! */
331  uprv_memset(resultsFromSystem, 0, gCountAvailable);
332  while ((name = uenum_next(res,NULL, &status)) != NULL) {
333    resultsFromSystem[findIndex(name)] = TRUE;
334  }
335  for(i = 0 ; i < gCountAvailable; i++) {
336    if(resultsManually[i] != resultsFromSystem[i]) {
337      log_err("failure in converter selector\n"
338              "converter %s had conflicting results -- manual: %d, system %d\n",
339              gAvailableNames[i], resultsManually[i], resultsFromSystem[i]);
340    }
341  }
342  uprv_free(resultsFromSystem);
343  uenum_close(res);
344}
345
346static UConverterSelector *
347serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *status) {
348  char *new_buffer;
349  int32_t ser_len, ser_len2;
350  /* preflight */
351  ser_len = ucnvsel_serialize(sel, NULL, 0, status);
352  if (*status != U_BUFFER_OVERFLOW_ERROR) {
353    log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status));
354    return sel;
355  }
356  new_buffer = (char *)uprv_malloc(ser_len);
357  *status = U_ZERO_ERROR;
358  ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status);
359  if (U_FAILURE(*status) || ser_len != ser_len2) {
360    log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status));
361    uprv_free(new_buffer);
362    return sel;
363  }
364  ucnvsel_close(sel);
365  uprv_free(*buffer);
366  *buffer = new_buffer;
367  sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status);
368  if (U_FAILURE(*status)) {
369    log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status));
370    return NULL;
371  }
372  return sel;
373}
374
375static void TestSelector()
376{
377  TestText text;
378  USet* excluded_sets[3] = { NULL };
379  int32_t i, testCaseIdx;
380
381  if (!getAvailableNames()) {
382    return;
383  }
384  if (!text_open(&text)) {
385    releaseAvailableNames();;
386  }
387
388  excluded_sets[0] = uset_openEmpty();
389  for(i = 1 ; i < 3 ; i++) {
390    excluded_sets[i] = uset_open(i*30, i*30+500);
391  }
392
393  for(testCaseIdx = 0; testCaseIdx < LENGTHOF(getEncodingsFns); testCaseIdx++)
394  {
395    int32_t excluded_set_id;
396    int32_t num_encodings;
397    const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings);
398    if (QUICK && num_encodings > 25) {
399      uprv_free((void *)encodings);
400      continue;
401    }
402
403    /*
404     * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++)
405     *
406     * This loop was replaced by the following statement because
407     * the loop made the test run longer without adding to the code coverage.
408     * The handling of the exclusion set is independent of the
409     * set of encodings, so there is no need to test every combination.
410     */
411    excluded_set_id = testCaseIdx % LENGTHOF(excluded_sets);
412    {
413      UConverterSelector *sel_rt, *sel_fb;
414      char *buffer_fb = NULL;
415      UErrorCode status = U_ZERO_ERROR;
416      sel_rt = ucnvsel_open(encodings, num_encodings,
417                            excluded_sets[excluded_set_id],
418                            UCNV_ROUNDTRIP_SET, &status);
419      if (num_encodings == gCountAvailable) {
420        /* test the special "all converters" parameter values */
421        sel_fb = ucnvsel_open(NULL, 0,
422                              excluded_sets[excluded_set_id],
423                              UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
424      } else if (uset_isEmpty(excluded_sets[excluded_set_id])) {
425        /* test that a NULL set gives the same results as an empty set */
426        sel_fb = ucnvsel_open(encodings, num_encodings,
427                              NULL,
428                              UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
429      } else {
430        sel_fb = ucnvsel_open(encodings, num_encodings,
431                              excluded_sets[excluded_set_id],
432                              UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
433      }
434      if (U_FAILURE(status)) {
435        log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_errorName(status));
436        ucnvsel_close(sel_rt);
437        uprv_free((void *)encodings);
438        continue;
439      }
440
441      text_reset(&text);
442      for (;;) {
443        UBool *manual_rt, *manual_fb;
444        static UChar utf16[10000];
445        char *s;
446        int32_t length8, length16;
447
448        s = text_nextString(&text, &length8);
449        if (s == NULL || (QUICK && text.number > 3)) {
450          break;
451        }
452
453        manual_rt = getResultsManually(encodings, num_encodings,
454                                       s, length8,
455                                       excluded_sets[excluded_set_id],
456                                       UCNV_ROUNDTRIP_SET);
457        manual_fb = getResultsManually(encodings, num_encodings,
458                                       s, length8,
459                                       excluded_sets[excluded_set_id],
460                                       UCNV_ROUNDTRIP_AND_FALLBACK_SET);
461        /* UTF-8 with length */
462        status = U_ZERO_ERROR;
463        verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_rt);
464        verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_fb);
465        /* UTF-8 NUL-terminated */
466        verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt);
467        verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb);
468
469        u_strFromUTF8(utf16, LENGTHOF(utf16), &length16, s, length8, &status);
470        if (U_FAILURE(status)) {
471          log_err("error converting the test text (string %ld) to UTF-16 - %s\n",
472                  (long)text.number, u_errorName(status));
473        } else {
474          if (text.number == 0) {
475            sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status);
476          }
477          if (U_SUCCESS(status)) {
478            /* UTF-16 with length */
479            verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &status), manual_rt);
480            verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &status), manual_fb);
481            /* UTF-16 NUL-terminated */
482            verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), manual_rt);
483            verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), manual_fb);
484          }
485        }
486
487        uprv_free(manual_rt);
488        uprv_free(manual_fb);
489      }
490      ucnvsel_close(sel_rt);
491      ucnvsel_close(sel_fb);
492      uprv_free(buffer_fb);
493    }
494    uprv_free((void *)encodings);
495  }
496
497  releaseAvailableNames();
498  text_close(&text);
499  for(i = 0 ; i < 3 ; i++) {
500    uset_close(excluded_sets[i]);
501  }
502}
503