1/********************************************************************
2 * Copyright (c) 1997-2014, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************
5 *
6 * File UCNVSELTST.C
7 *
8 * Modification History:
9 *        Name                     Description
10 *     MOHAMED ELDAWY               Creation
11 ********************************************************************
12 */
13
14/* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/
15
16#include "ucnvseltst.h"
17
18#include <stdio.h>
19
20#include "unicode/utypes.h"
21#include "unicode/ucnvsel.h"
22#include "unicode/ustring.h"
23#include "cmemory.h"
24#include "cstring.h"
25#include "propsvec.h"
26
27#define FILENAME_BUFFER 1024
28
29#define TDSRCPATH  ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING
30
31static void TestSelector(void);
32static void TestUPropsVector(void);
33void addCnvSelTest(TestNode** root);  /* Declaration required to suppress compiler warnings. */
34
35void addCnvSelTest(TestNode** root)
36{
37    addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector");
38    addTest(root, &TestUPropsVector, "tsconv/ucnvseltst/TestUPropsVector");
39}
40
41static const char **gAvailableNames = NULL;
42static int32_t gCountAvailable = 0;
43
44static UBool
45getAvailableNames() {
46  int32_t i;
47  if (gAvailableNames != NULL) {
48    return TRUE;
49  }
50  gCountAvailable = ucnv_countAvailable();
51  if (gCountAvailable == 0) {
52    log_data_err("No converters available.\n");
53    return FALSE;
54  }
55  gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const char *));
56  if (gAvailableNames == NULL) {
57    log_err("unable to allocate memory for %ld available converter names\n",
58            (long)gCountAvailable);
59    return FALSE;
60  }
61  for (i = 0; i < gCountAvailable; ++i) {
62    gAvailableNames[i] = ucnv_getAvailableName(i);
63  }
64  return TRUE;
65}
66
67static void
68releaseAvailableNames() {
69  uprv_free((void *)gAvailableNames);
70  gAvailableNames = NULL;
71  gCountAvailable = 0;
72}
73
74static const char **
75getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) {
76  const char **names;
77  int32_t i;
78
79  *pCount = 0;
80  if (count <= 0) {
81    return NULL;
82  }
83  names = (const char **)uprv_malloc(count * sizeof(char *));
84  if (names == NULL) {
85    log_err("memory allocation error for %ld pointers\n", (long)count);
86    return NULL;
87  }
88  if (step == 0 && count > 0) {
89    step = 1;
90  }
91  for (i = 0; i < count; ++i) {
92    if (0 <= start && start < gCountAvailable) {
93      names[i] = gAvailableNames[start];
94      start += step;
95      ++*pCount;
96    }
97  }
98  return names;
99}
100
101#if 0
102/*
103 * ucnvsel_open() does not support "no encodings":
104 * Given 0 encodings it will open a selector for all available ones.
105 */
106static const char **
107getNoEncodings(int32_t *pCount) {
108  *pCount = 0;
109  return NULL;
110}
111#endif
112
113static const char **
114getOneEncoding(int32_t *pCount) {
115  return getEncodings(1, 0, 1, pCount);
116}
117
118static const char **
119getFirstEvenEncodings(int32_t *pCount) {
120  return getEncodings(0, 2, 25, pCount);
121}
122
123static const char **
124getMiddleEncodings(int32_t *pCount) {
125  return getEncodings(gCountAvailable - 12, 1, 22, pCount);
126}
127
128static const char **
129getLastEncodings(int32_t *pCount) {
130  return getEncodings(gCountAvailable - 1, -1, 25, pCount);
131}
132
133static const char **
134getSomeEncodings(int32_t *pCount) {
135  /* 20 evenly distributed */
136  return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount);
137}
138
139static const char **
140getEveryThirdEncoding(int32_t *pCount) {
141  return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount);
142}
143
144static const char **
145getAllEncodings(int32_t *pCount) {
146  return getEncodings(0, 1, gCountAvailable, pCount);
147}
148
149typedef const char **GetEncodingsFn(int32_t *);
150
151static GetEncodingsFn *const getEncodingsFns[] = {
152  getOneEncoding,
153  getFirstEvenEncodings,
154  getMiddleEncodings,
155  getLastEncodings,
156  getSomeEncodings,
157  getEveryThirdEncoding,
158  getAllEncodings
159};
160
161static FILE *fopenOrError(const char *filename) {
162    int32_t needLen;
163    FILE *f;
164    char fnbuf[FILENAME_BUFFER];
165    const char* directory= ctest_dataSrcDir();
166    needLen = uprv_strlen(directory)+uprv_strlen(TDSRCPATH)+uprv_strlen(filename)+1;
167    if(needLen > FILENAME_BUFFER) {
168        log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n",
169                filename, needLen, FILENAME_BUFFER);
170        return NULL;
171    }
172
173    strcpy(fnbuf, directory);
174    strcat(fnbuf, TDSRCPATH);
175    strcat(fnbuf, filename);
176
177    f = fopen(fnbuf, "rb");
178
179    if(f == NULL) {
180        log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename);
181    }
182    return f;
183}
184
185typedef struct TestText {
186  char *text, *textLimit;
187  char *limit;
188  int32_t number;
189} TestText;
190
191static void
192text_reset(TestText *tt) {
193  tt->limit = tt->text;
194  tt->number = 0;
195}
196
197static char *
198text_nextString(TestText *tt, int32_t *pLength) {
199  char *s = tt->limit;
200  if (s == tt->textLimit) {
201    /* we already delivered the last string */
202    return NULL;
203  } else if (s == tt->text) {
204    /* first string */
205    if ((tt->textLimit - tt->text) >= 3 &&
206        s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf
207    ) {
208      s += 3;  /* skip the UTF-8 signature byte sequence (U+FEFF) */
209    }
210  } else {
211    /* skip the string terminator */
212    ++s;
213    ++tt->number;
214  }
215
216  /* find the end of this string */
217  tt->limit = uprv_strchr(s, 0);
218  *pLength = (int32_t)(tt->limit - s);
219  return s;
220}
221
222static UBool
223text_open(TestText *tt) {
224  FILE *f;
225  char *s;
226  int32_t length;
227  uprv_memset(tt, 0, sizeof(TestText));
228  f = fopenOrError("ConverterSelectorTestUTF8.txt");
229  if(!f) {
230    return FALSE;
231  }
232  fseek(f, 0, SEEK_END);
233  length = (int32_t)ftell(f);
234  fseek(f, 0, SEEK_SET);
235  tt->text = (char *)uprv_malloc(length + 1);
236  if (tt->text == NULL) {
237    fclose(f);
238    return FALSE;
239  }
240  if (length != fread(tt->text, 1, length, f)) {
241    log_err("error reading %ld bytes from test text file\n", (long)length);
242    length = 0;
243    uprv_free(tt->text);
244  }
245  fclose(f);
246  tt->textLimit = tt->text + length;
247  *tt->textLimit = 0;
248  /* replace all Unicode '#' (U+0023) with NUL */
249  for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {}
250  text_reset(tt);
251  return TRUE;
252}
253
254static void
255text_close(TestText *tt) {
256  uprv_free(tt->text);
257}
258
259static int32_t findIndex(const char* converterName) {
260  int32_t i;
261  for (i = 0 ; i < gCountAvailable; i++) {
262    if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) {
263      return i;
264    }
265  }
266  return -1;
267}
268
269static UBool *
270getResultsManually(const char** encodings, int32_t num_encodings,
271                   const char *utf8, int32_t length,
272                   const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) {
273  UBool* resultsManually;
274  int32_t i;
275
276  resultsManually = (UBool*) uprv_malloc(gCountAvailable);
277  uprv_memset(resultsManually, 0, gCountAvailable);
278
279  for(i = 0 ; i < num_encodings ; i++) {
280    UErrorCode status = U_ZERO_ERROR;
281    /* get unicode set for that converter */
282    USet* set;
283    UConverter* test_converter;
284    UChar32 cp;
285    int32_t encIndex, offset;
286
287    set = uset_openEmpty();
288    test_converter = ucnv_open(encodings[i], &status);
289    ucnv_getUnicodeSet(test_converter, set,
290                       whichSet, &status);
291    if (excludedCodePoints != NULL) {
292      uset_addAll(set, excludedCodePoints);
293    }
294    uset_freeze(set);
295    offset = 0;
296    cp = 0;
297
298    encIndex = findIndex(encodings[i]);
299    /*
300     * The following is almost, but not entirely, the same as
301     * resultsManually[encIndex] =
302     *   (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length);
303     * They might be different if the set contains strings,
304     * or if the utf8 string contains an illegal sequence.
305     *
306     * The UConverterSelector does not currently handle strings that can be
307     * converted, and it treats an illegal sequence as convertible
308     * while uset_spanUTF8() treats it like U+FFFD which may not be convertible.
309     */
310    resultsManually[encIndex] = TRUE;
311    while(offset<length) {
312      U8_NEXT(utf8, offset, length, cp);
313      if (cp >= 0 && !uset_contains(set, cp)) {
314        resultsManually[encIndex] = FALSE;
315        break;
316      }
317    }
318    uset_close(set);
319    ucnv_close(test_converter);
320  }
321  return resultsManually;
322}
323
324/* closes res but does not free resultsManually */
325static void verifyResult(UEnumeration* res, const UBool *resultsManually) {
326  UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool));
327  const char* name;
328  UErrorCode status = U_ZERO_ERROR;
329  int32_t i;
330
331  /* fill the bool for the selector results! */
332  uprv_memset(resultsFromSystem, 0, gCountAvailable);
333  while ((name = uenum_next(res,NULL, &status)) != NULL) {
334    resultsFromSystem[findIndex(name)] = TRUE;
335  }
336  for(i = 0 ; i < gCountAvailable; i++) {
337    if(resultsManually[i] != resultsFromSystem[i]) {
338      log_err("failure in converter selector\n"
339              "converter %s had conflicting results -- manual: %d, system %d\n",
340              gAvailableNames[i], resultsManually[i], resultsFromSystem[i]);
341    }
342  }
343  uprv_free(resultsFromSystem);
344  uenum_close(res);
345}
346
347static UConverterSelector *
348serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *status) {
349  char *new_buffer;
350  int32_t ser_len, ser_len2;
351  /* preflight */
352  ser_len = ucnvsel_serialize(sel, NULL, 0, status);
353  if (*status != U_BUFFER_OVERFLOW_ERROR) {
354    log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status));
355    return sel;
356  }
357  new_buffer = (char *)uprv_malloc(ser_len);
358  *status = U_ZERO_ERROR;
359  ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status);
360  if (U_FAILURE(*status) || ser_len != ser_len2) {
361    log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status));
362    uprv_free(new_buffer);
363    return sel;
364  }
365  ucnvsel_close(sel);
366  uprv_free(*buffer);
367  *buffer = new_buffer;
368  sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status);
369  if (U_FAILURE(*status)) {
370    log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status));
371    return NULL;
372  }
373  return sel;
374}
375
376static void TestSelector()
377{
378  TestText text;
379  USet* excluded_sets[3] = { NULL };
380  int32_t i, testCaseIdx;
381
382  if (!getAvailableNames()) {
383    return;
384  }
385  if (!text_open(&text)) {
386    releaseAvailableNames();;
387  }
388
389  excluded_sets[0] = uset_openEmpty();
390  for(i = 1 ; i < 3 ; i++) {
391    excluded_sets[i] = uset_open(i*30, i*30+500);
392  }
393
394  for(testCaseIdx = 0; testCaseIdx < UPRV_LENGTHOF(getEncodingsFns); testCaseIdx++)
395  {
396    int32_t excluded_set_id;
397    int32_t num_encodings;
398    const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings);
399    if (getTestOption(QUICK_OPTION) && num_encodings > 25) {
400      uprv_free((void *)encodings);
401      continue;
402    }
403
404    /*
405     * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++)
406     *
407     * This loop was replaced by the following statement because
408     * the loop made the test run longer without adding to the code coverage.
409     * The handling of the exclusion set is independent of the
410     * set of encodings, so there is no need to test every combination.
411     */
412    excluded_set_id = testCaseIdx % UPRV_LENGTHOF(excluded_sets);
413    {
414      UConverterSelector *sel_rt, *sel_fb;
415      char *buffer_fb = NULL;
416      UErrorCode status = U_ZERO_ERROR;
417      sel_rt = ucnvsel_open(encodings, num_encodings,
418                            excluded_sets[excluded_set_id],
419                            UCNV_ROUNDTRIP_SET, &status);
420      if (num_encodings == gCountAvailable) {
421        /* test the special "all converters" parameter values */
422        sel_fb = ucnvsel_open(NULL, 0,
423                              excluded_sets[excluded_set_id],
424                              UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
425      } else if (uset_isEmpty(excluded_sets[excluded_set_id])) {
426        /* test that a NULL set gives the same results as an empty set */
427        sel_fb = ucnvsel_open(encodings, num_encodings,
428                              NULL,
429                              UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
430      } else {
431        sel_fb = ucnvsel_open(encodings, num_encodings,
432                              excluded_sets[excluded_set_id],
433                              UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
434      }
435      if (U_FAILURE(status)) {
436        log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_errorName(status));
437        ucnvsel_close(sel_rt);
438        uprv_free((void *)encodings);
439        continue;
440      }
441
442      text_reset(&text);
443      for (;;) {
444        UBool *manual_rt, *manual_fb;
445        static UChar utf16[10000];
446        char *s;
447        int32_t length8, length16;
448
449        s = text_nextString(&text, &length8);
450        if (s == NULL || (getTestOption(QUICK_OPTION) && text.number > 3)) {
451          break;
452        }
453
454        manual_rt = getResultsManually(encodings, num_encodings,
455                                       s, length8,
456                                       excluded_sets[excluded_set_id],
457                                       UCNV_ROUNDTRIP_SET);
458        manual_fb = getResultsManually(encodings, num_encodings,
459                                       s, length8,
460                                       excluded_sets[excluded_set_id],
461                                       UCNV_ROUNDTRIP_AND_FALLBACK_SET);
462        /* UTF-8 with length */
463        status = U_ZERO_ERROR;
464        verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_rt);
465        verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_fb);
466        /* UTF-8 NUL-terminated */
467        verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt);
468        verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb);
469
470        u_strFromUTF8(utf16, UPRV_LENGTHOF(utf16), &length16, s, length8, &status);
471        if (U_FAILURE(status)) {
472          log_err("error converting the test text (string %ld) to UTF-16 - %s\n",
473                  (long)text.number, u_errorName(status));
474        } else {
475          if (text.number == 0) {
476            sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status);
477          }
478          if (U_SUCCESS(status)) {
479            /* UTF-16 with length */
480            verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &status), manual_rt);
481            verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &status), manual_fb);
482            /* UTF-16 NUL-terminated */
483            verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), manual_rt);
484            verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), manual_fb);
485          }
486        }
487
488        uprv_free(manual_rt);
489        uprv_free(manual_fb);
490      }
491      ucnvsel_close(sel_rt);
492      ucnvsel_close(sel_fb);
493      uprv_free(buffer_fb);
494    }
495    uprv_free((void *)encodings);
496  }
497
498  releaseAvailableNames();
499  text_close(&text);
500  for(i = 0 ; i < 3 ; i++) {
501    uset_close(excluded_sets[i]);
502  }
503}
504
505/* Improve code coverage of UPropsVectors */
506static void TestUPropsVector() {
507    UErrorCode errorCode = U_ILLEGAL_ARGUMENT_ERROR;
508    UPropsVectors *pv = upvec_open(100, &errorCode);
509    if (pv != NULL) {
510        log_err("Should have returned NULL if UErrorCode is an error.");
511        return;
512    }
513    errorCode = U_ZERO_ERROR;
514    pv = upvec_open(-1, &errorCode);
515    if (pv != NULL || U_SUCCESS(errorCode)) {
516        log_err("Should have returned NULL if column is less than 0.\n");
517        return;
518    }
519    errorCode = U_ZERO_ERROR;
520    pv = upvec_open(100, &errorCode);
521    if (pv == NULL || U_FAILURE(errorCode)) {
522        log_err("Unable to open UPropsVectors.\n");
523        return;
524    }
525
526    if (upvec_getValue(pv, 0, 1) != 0) {
527        log_err("upvec_getValue should return 0.\n");
528    }
529    if (upvec_getRow(pv, 0, NULL, NULL) == NULL) {
530        log_err("upvec_getRow should not return NULL.\n");
531    }
532    if (upvec_getArray(pv, NULL, NULL) != NULL) {
533        log_err("upvec_getArray should return NULL.\n");
534    }
535
536    upvec_close(pv);
537}
538