1/********************************************************************
2 * Copyright (c) 1997-2011, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************
5 *
6 * File UCNVSELTST.C
7 *
8 * Modification History:
9 *        Name                     Description
10 *     MOHAMED ELDAWY               Creation
11 ********************************************************************
12 */
13
14/* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/
15
16#include "ucnvseltst.h"
17
18#include <stdio.h>
19
20#include "unicode/utypes.h"
21#include "unicode/ucnvsel.h"
22#include "unicode/ustring.h"
23#include "cmemory.h"
24#include "cstring.h"
25#include "propsvec.h"
26
27#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
28
29#define FILENAME_BUFFER 1024
30
31#define TDSRCPATH  ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING
32
33static void TestSelector(void);
34static void TestUPropsVector(void);
35void addCnvSelTest(TestNode** root);  /* Declaration required to suppress compiler warnings. */
36
37void addCnvSelTest(TestNode** root)
38{
39    addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector");
40    addTest(root, &TestUPropsVector, "tsconv/ucnvseltst/TestUPropsVector");
41}
42
43static const char **gAvailableNames = NULL;
44static int32_t gCountAvailable = 0;
45
46static UBool
47getAvailableNames() {
48  int32_t i;
49  if (gAvailableNames != NULL) {
50    return TRUE;
51  }
52  gCountAvailable = ucnv_countAvailable();
53  if (gCountAvailable == 0) {
54    log_data_err("No converters available.\n");
55    return FALSE;
56  }
57  gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const char *));
58  if (gAvailableNames == NULL) {
59    log_err("unable to allocate memory for %ld available converter names\n",
60            (long)gCountAvailable);
61    return FALSE;
62  }
63  for (i = 0; i < gCountAvailable; ++i) {
64    gAvailableNames[i] = ucnv_getAvailableName(i);
65  }
66  return TRUE;
67}
68
69static void
70releaseAvailableNames() {
71  uprv_free((void *)gAvailableNames);
72  gAvailableNames = NULL;
73  gCountAvailable = 0;
74}
75
76static const char **
77getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) {
78  const char **names;
79  int32_t i;
80
81  *pCount = 0;
82  if (count <= 0) {
83    return NULL;
84  }
85  names = (const char **)uprv_malloc(count * sizeof(char *));
86  if (names == NULL) {
87    log_err("memory allocation error for %ld pointers\n", (long)count);
88    return NULL;
89  }
90  if (step == 0 && count > 0) {
91    step = 1;
92  }
93  for (i = 0; i < count; ++i) {
94    if (0 <= start && start < gCountAvailable) {
95      names[i] = gAvailableNames[start];
96      start += step;
97      ++*pCount;
98    }
99  }
100  return names;
101}
102
103#if 0
104/*
105 * ucnvsel_open() does not support "no encodings":
106 * Given 0 encodings it will open a selector for all available ones.
107 */
108static const char **
109getNoEncodings(int32_t *pCount) {
110  *pCount = 0;
111  return NULL;
112}
113#endif
114
115static const char **
116getOneEncoding(int32_t *pCount) {
117  return getEncodings(1, 0, 1, pCount);
118}
119
120static const char **
121getFirstEvenEncodings(int32_t *pCount) {
122  return getEncodings(0, 2, 25, pCount);
123}
124
125static const char **
126getMiddleEncodings(int32_t *pCount) {
127  return getEncodings(gCountAvailable - 12, 1, 22, pCount);
128}
129
130static const char **
131getLastEncodings(int32_t *pCount) {
132  return getEncodings(gCountAvailable - 1, -1, 25, pCount);
133}
134
135static const char **
136getSomeEncodings(int32_t *pCount) {
137  /* 20 evenly distributed */
138  return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount);
139}
140
141static const char **
142getEveryThirdEncoding(int32_t *pCount) {
143  return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount);
144}
145
146static const char **
147getAllEncodings(int32_t *pCount) {
148  return getEncodings(0, 1, gCountAvailable, pCount);
149}
150
151typedef const char **GetEncodingsFn(int32_t *);
152
153static GetEncodingsFn *const getEncodingsFns[] = {
154  getOneEncoding,
155  getFirstEvenEncodings,
156  getMiddleEncodings,
157  getLastEncodings,
158  getSomeEncodings,
159  getEveryThirdEncoding,
160  getAllEncodings
161};
162
163static FILE *fopenOrError(const char *filename) {
164    int32_t needLen;
165    FILE *f;
166    char fnbuf[FILENAME_BUFFER];
167    const char* directory= ctest_dataSrcDir();
168    needLen = uprv_strlen(directory)+uprv_strlen(TDSRCPATH)+uprv_strlen(filename)+1;
169    if(needLen > FILENAME_BUFFER) {
170        log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n",
171                filename, needLen, FILENAME_BUFFER);
172        return NULL;
173    }
174
175    strcpy(fnbuf, directory);
176    strcat(fnbuf, TDSRCPATH);
177    strcat(fnbuf, filename);
178
179    f = fopen(fnbuf, "rb");
180
181    if(f == NULL) {
182        log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename);
183    }
184    return f;
185}
186
187typedef struct TestText {
188  char *text, *textLimit;
189  char *limit;
190  int32_t number;
191} TestText;
192
193static void
194text_reset(TestText *tt) {
195  tt->limit = tt->text;
196  tt->number = 0;
197}
198
199static char *
200text_nextString(TestText *tt, int32_t *pLength) {
201  char *s = tt->limit;
202  if (s == tt->textLimit) {
203    /* we already delivered the last string */
204    return NULL;
205  } else if (s == tt->text) {
206    /* first string */
207    if ((tt->textLimit - tt->text) >= 3 &&
208        s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf
209    ) {
210      s += 3;  /* skip the UTF-8 signature byte sequence (U+FEFF) */
211    }
212  } else {
213    /* skip the string terminator */
214    ++s;
215    ++tt->number;
216  }
217
218  /* find the end of this string */
219  tt->limit = uprv_strchr(s, 0);
220  *pLength = (int32_t)(tt->limit - s);
221  return s;
222}
223
224static UBool
225text_open(TestText *tt) {
226  FILE *f;
227  char *s;
228  int32_t length;
229  uprv_memset(tt, 0, sizeof(TestText));
230  f = fopenOrError("ConverterSelectorTestUTF8.txt");
231  if(!f) {
232    return FALSE;
233  }
234  fseek(f, 0, SEEK_END);
235  length = (int32_t)ftell(f);
236  fseek(f, 0, SEEK_SET);
237  tt->text = (char *)uprv_malloc(length + 1);
238  if (tt->text == NULL) {
239    fclose(f);
240    return FALSE;
241  }
242  if (length != fread(tt->text, 1, length, f)) {
243    log_err("error reading %ld bytes from test text file\n", (long)length);
244    length = 0;
245    uprv_free(tt->text);
246  }
247  fclose(f);
248  tt->textLimit = tt->text + length;
249  *tt->textLimit = 0;
250  /* replace all Unicode '#' (U+0023) with NUL */
251  for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {}
252  text_reset(tt);
253  return TRUE;
254}
255
256static void
257text_close(TestText *tt) {
258  uprv_free(tt->text);
259}
260
261static int32_t findIndex(const char* converterName) {
262  int32_t i;
263  for (i = 0 ; i < gCountAvailable; i++) {
264    if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) {
265      return i;
266    }
267  }
268  return -1;
269}
270
271static UBool *
272getResultsManually(const char** encodings, int32_t num_encodings,
273                   const char *utf8, int32_t length,
274                   const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) {
275  UBool* resultsManually;
276  int32_t i;
277
278  resultsManually = (UBool*) uprv_malloc(gCountAvailable);
279  uprv_memset(resultsManually, 0, gCountAvailable);
280
281  for(i = 0 ; i < num_encodings ; i++) {
282    UErrorCode status = U_ZERO_ERROR;
283    /* get unicode set for that converter */
284    USet* set;
285    UConverter* test_converter;
286    UChar32 cp;
287    int32_t encIndex, offset;
288
289    set = uset_openEmpty();
290    test_converter = ucnv_open(encodings[i], &status);
291    ucnv_getUnicodeSet(test_converter, set,
292                       whichSet, &status);
293    if (excludedCodePoints != NULL) {
294      uset_addAll(set, excludedCodePoints);
295    }
296    uset_freeze(set);
297    offset = 0;
298    cp = 0;
299
300    encIndex = findIndex(encodings[i]);
301    /*
302     * The following is almost, but not entirely, the same as
303     * resultsManually[encIndex] =
304     *   (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length);
305     * They might be different if the set contains strings,
306     * or if the utf8 string contains an illegal sequence.
307     *
308     * The UConverterSelector does not currently handle strings that can be
309     * converted, and it treats an illegal sequence as convertible
310     * while uset_spanUTF8() treats it like U+FFFD which may not be convertible.
311     */
312    resultsManually[encIndex] = TRUE;
313    while(offset<length) {
314      U8_NEXT(utf8, offset, length, cp);
315      if (cp >= 0 && !uset_contains(set, cp)) {
316        resultsManually[encIndex] = FALSE;
317        break;
318      }
319    }
320    uset_close(set);
321    ucnv_close(test_converter);
322  }
323  return resultsManually;
324}
325
326/* closes res but does not free resultsManually */
327static void verifyResult(UEnumeration* res, const UBool *resultsManually) {
328  UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool));
329  const char* name;
330  UErrorCode status = U_ZERO_ERROR;
331  int32_t i;
332
333  /* fill the bool for the selector results! */
334  uprv_memset(resultsFromSystem, 0, gCountAvailable);
335  while ((name = uenum_next(res,NULL, &status)) != NULL) {
336    resultsFromSystem[findIndex(name)] = TRUE;
337  }
338  for(i = 0 ; i < gCountAvailable; i++) {
339    if(resultsManually[i] != resultsFromSystem[i]) {
340      log_err("failure in converter selector\n"
341              "converter %s had conflicting results -- manual: %d, system %d\n",
342              gAvailableNames[i], resultsManually[i], resultsFromSystem[i]);
343    }
344  }
345  uprv_free(resultsFromSystem);
346  uenum_close(res);
347}
348
349static UConverterSelector *
350serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *status) {
351  char *new_buffer;
352  int32_t ser_len, ser_len2;
353  /* preflight */
354  ser_len = ucnvsel_serialize(sel, NULL, 0, status);
355  if (*status != U_BUFFER_OVERFLOW_ERROR) {
356    log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status));
357    return sel;
358  }
359  new_buffer = (char *)uprv_malloc(ser_len);
360  *status = U_ZERO_ERROR;
361  ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status);
362  if (U_FAILURE(*status) || ser_len != ser_len2) {
363    log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status));
364    uprv_free(new_buffer);
365    return sel;
366  }
367  ucnvsel_close(sel);
368  uprv_free(*buffer);
369  *buffer = new_buffer;
370  sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status);
371  if (U_FAILURE(*status)) {
372    log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status));
373    return NULL;
374  }
375  return sel;
376}
377
378static void TestSelector()
379{
380  TestText text;
381  USet* excluded_sets[3] = { NULL };
382  int32_t i, testCaseIdx;
383
384  if (!getAvailableNames()) {
385    return;
386  }
387  if (!text_open(&text)) {
388    releaseAvailableNames();;
389  }
390
391  excluded_sets[0] = uset_openEmpty();
392  for(i = 1 ; i < 3 ; i++) {
393    excluded_sets[i] = uset_open(i*30, i*30+500);
394  }
395
396  for(testCaseIdx = 0; testCaseIdx < LENGTHOF(getEncodingsFns); testCaseIdx++)
397  {
398    int32_t excluded_set_id;
399    int32_t num_encodings;
400    const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings);
401    if (getTestOption(QUICK_OPTION) && num_encodings > 25) {
402      uprv_free((void *)encodings);
403      continue;
404    }
405
406    /*
407     * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++)
408     *
409     * This loop was replaced by the following statement because
410     * the loop made the test run longer without adding to the code coverage.
411     * The handling of the exclusion set is independent of the
412     * set of encodings, so there is no need to test every combination.
413     */
414    excluded_set_id = testCaseIdx % LENGTHOF(excluded_sets);
415    {
416      UConverterSelector *sel_rt, *sel_fb;
417      char *buffer_fb = NULL;
418      UErrorCode status = U_ZERO_ERROR;
419      sel_rt = ucnvsel_open(encodings, num_encodings,
420                            excluded_sets[excluded_set_id],
421                            UCNV_ROUNDTRIP_SET, &status);
422      if (num_encodings == gCountAvailable) {
423        /* test the special "all converters" parameter values */
424        sel_fb = ucnvsel_open(NULL, 0,
425                              excluded_sets[excluded_set_id],
426                              UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
427      } else if (uset_isEmpty(excluded_sets[excluded_set_id])) {
428        /* test that a NULL set gives the same results as an empty set */
429        sel_fb = ucnvsel_open(encodings, num_encodings,
430                              NULL,
431                              UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
432      } else {
433        sel_fb = ucnvsel_open(encodings, num_encodings,
434                              excluded_sets[excluded_set_id],
435                              UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
436      }
437      if (U_FAILURE(status)) {
438        log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_errorName(status));
439        ucnvsel_close(sel_rt);
440        uprv_free((void *)encodings);
441        continue;
442      }
443
444      text_reset(&text);
445      for (;;) {
446        UBool *manual_rt, *manual_fb;
447        static UChar utf16[10000];
448        char *s;
449        int32_t length8, length16;
450
451        s = text_nextString(&text, &length8);
452        if (s == NULL || (getTestOption(QUICK_OPTION) && text.number > 3)) {
453          break;
454        }
455
456        manual_rt = getResultsManually(encodings, num_encodings,
457                                       s, length8,
458                                       excluded_sets[excluded_set_id],
459                                       UCNV_ROUNDTRIP_SET);
460        manual_fb = getResultsManually(encodings, num_encodings,
461                                       s, length8,
462                                       excluded_sets[excluded_set_id],
463                                       UCNV_ROUNDTRIP_AND_FALLBACK_SET);
464        /* UTF-8 with length */
465        status = U_ZERO_ERROR;
466        verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_rt);
467        verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_fb);
468        /* UTF-8 NUL-terminated */
469        verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt);
470        verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb);
471
472        u_strFromUTF8(utf16, LENGTHOF(utf16), &length16, s, length8, &status);
473        if (U_FAILURE(status)) {
474          log_err("error converting the test text (string %ld) to UTF-16 - %s\n",
475                  (long)text.number, u_errorName(status));
476        } else {
477          if (text.number == 0) {
478            sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status);
479          }
480          if (U_SUCCESS(status)) {
481            /* UTF-16 with length */
482            verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &status), manual_rt);
483            verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &status), manual_fb);
484            /* UTF-16 NUL-terminated */
485            verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), manual_rt);
486            verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), manual_fb);
487          }
488        }
489
490        uprv_free(manual_rt);
491        uprv_free(manual_fb);
492      }
493      ucnvsel_close(sel_rt);
494      ucnvsel_close(sel_fb);
495      uprv_free(buffer_fb);
496    }
497    uprv_free((void *)encodings);
498  }
499
500  releaseAvailableNames();
501  text_close(&text);
502  for(i = 0 ; i < 3 ; i++) {
503    uset_close(excluded_sets[i]);
504  }
505}
506
507/* Improve code coverage of UPropsVectors */
508static void TestUPropsVector() {
509    UErrorCode errorCode = U_ILLEGAL_ARGUMENT_ERROR;
510    UPropsVectors *pv = upvec_open(100, &errorCode);
511    if (pv != NULL) {
512        log_err("Should have returned NULL if UErrorCode is an error.");
513        return;
514    }
515    errorCode = U_ZERO_ERROR;
516    pv = upvec_open(-1, &errorCode);
517    if (pv != NULL || U_SUCCESS(errorCode)) {
518        log_err("Should have returned NULL if column is less than 0.\n");
519        return;
520    }
521    errorCode = U_ZERO_ERROR;
522    pv = upvec_open(100, &errorCode);
523    if (pv == NULL || U_FAILURE(errorCode)) {
524        log_err("Unable to open UPropsVectors.\n");
525        return;
526    }
527
528    if (upvec_getValue(pv, 0, 1) != 0) {
529        log_err("upvec_getValue should return 0.\n");
530    }
531    if (upvec_getRow(pv, 0, NULL, NULL) == NULL) {
532        log_err("upvec_getRow should not return NULL.\n");
533    }
534    if (upvec_getArray(pv, NULL, NULL) != NULL) {
535        log_err("upvec_getArray should return NULL.\n");
536    }
537
538    upvec_close(pv);
539}
540