1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7#include "unicode/utypes.h"
8#include "unicode/uchar.h"
9#include "unicode/normlzr.h"
10#include "unicode/uniset.h"
11#include "unicode/usetiter.h"
12#include "unicode/schriter.h"
13#include "tstnorm.h"
14
15#if !UCONFIG_NO_NORMALIZATION
16
17static UErrorCode status = U_ZERO_ERROR;
18
19// test APIs that are not otherwise used - improve test coverage
20void
21BasicNormalizerTest::TestNormalizerAPI() {
22    // instantiate a Normalizer from a CharacterIterator
23    UnicodeString s=UnicodeString("a\\u0308\\uac00\\U0002f800", "").unescape();
24    s.append(s); // make s a bit longer and more interesting
25    StringCharacterIterator iter(s);
26    Normalizer norm(iter, UNORM_NFC);
27    if(norm.next()!=0xe4) {
28        dataerrln("error in Normalizer(CharacterIterator).next()");
29    }
30
31    // test copy constructor
32    Normalizer copy(norm);
33    if(copy.next()!=0xac00) {
34        dataerrln("error in Normalizer(Normalizer(CharacterIterator)).next()");
35    }
36
37    // test clone(), ==, and hashCode()
38    Normalizer *clone=copy.clone();
39    if(*clone!=copy) {
40        errln("error in Normalizer(Normalizer(CharacterIterator)).clone()!=copy");
41    }
42    // clone must have the same hashCode()
43    if(clone->hashCode()!=copy.hashCode()) {
44        errln("error in Normalizer(Normalizer(CharacterIterator)).clone()->hashCode()!=copy.hashCode()");
45    }
46    if(clone->next()!=0x4e3d) {
47        dataerrln("error in Normalizer(Normalizer(CharacterIterator)).clone()->next()");
48    }
49    // position changed, must change hashCode()
50    if(clone->hashCode()==copy.hashCode()) {
51        errln("error in Normalizer(Normalizer(CharacterIterator)).clone()->next().hashCode()==copy.hashCode()");
52    }
53    delete clone;
54    clone=0;
55
56    // test compose() and decompose()
57    UnicodeString tel, nfkc, nfkd;
58    tel=UnicodeString(1, (UChar32)0x2121, 10);
59    tel.insert(1, (UChar)0x301);
60
61    UErrorCode errorCode=U_ZERO_ERROR;
62    Normalizer::compose(tel, TRUE, 0, nfkc, errorCode);
63    Normalizer::decompose(tel, TRUE, 0, nfkd, errorCode);
64    if(U_FAILURE(errorCode)) {
65        dataerrln("error in Normalizer::(de)compose(): %s", u_errorName(errorCode));
66    } else if(
67        nfkc!=UnicodeString("TE\\u0139TELTELTELTELTELTELTELTELTEL", "").unescape() ||
68        nfkd!=UnicodeString("TEL\\u0301TELTELTELTELTELTELTELTELTEL", "").unescape()
69    ) {
70        errln("error in Normalizer::(de)compose(): wrong result(s)");
71    }
72
73    // test setIndex()
74    norm.setIndexOnly(3);
75    if(norm.current()!=0x4e3d) {
76        dataerrln("error in Normalizer(CharacterIterator).setIndex(3)");
77    }
78
79    // test setText(CharacterIterator) and getText()
80    UnicodeString out, out2;
81    errorCode=U_ZERO_ERROR;
82    copy.setText(iter, errorCode);
83    if(U_FAILURE(errorCode)) {
84        errln("error Normalizer::setText() failed: %s", u_errorName(errorCode));
85    } else {
86        copy.getText(out);
87        iter.getText(out2);
88        if( out!=out2 ||
89            copy.startIndex()!=iter.startIndex() ||
90            copy.endIndex()!=iter.endIndex()
91        ) {
92            errln("error in Normalizer::setText() or Normalizer::getText()");
93        }
94    }
95
96    // test setText(UChar *), getUMode() and setMode()
97    errorCode=U_ZERO_ERROR;
98    copy.setText(s.getBuffer()+1, s.length()-1, errorCode);
99    copy.setMode(UNORM_NFD);
100    if(copy.getUMode()!=UNORM_NFD) {
101        errln("error in Normalizer::setMode() or Normalizer::getUMode()");
102    }
103    if(copy.next()!=0x308 || copy.next()!=0x1100) {
104        dataerrln("error in Normalizer::setText(UChar *) or Normalizer::setMode()");
105    }
106
107    // test setText(UChar *, length=-1)
108    errorCode=U_ZERO_ERROR;
109
110    // NUL-terminate s
111    s.append((UChar)0);         // append NUL
112    s.truncate(s.length()-1);   // undo length change
113
114    copy.setText(s.getBuffer()+1, -1, errorCode);
115    if(copy.endIndex()!=s.length()-1) {
116        errln("error in Normalizer::setText(UChar *, -1)");
117    }
118
119    // test setOption() and getOption()
120    copy.setOption(0xaa0000, TRUE);
121    copy.setOption(0x20000, FALSE);
122    if(!copy.getOption(0x880000) || copy.getOption(0x20000)) {
123        errln("error in Normalizer::setOption() or Normalizer::getOption()");
124    }
125
126    // test last()/previous() with an internal buffer overflow
127    errorCode=U_ZERO_ERROR;
128    copy.setText(UnicodeString(1000, (UChar32)0x308, 1000), errorCode);
129    if(copy.last()!=0x308) {
130        errln("error in Normalizer(1000*U+0308).last()");
131    }
132
133    // test UNORM_NONE
134    norm.setMode(UNORM_NONE);
135    if(norm.first()!=0x61 || norm.next()!=0x308 || norm.last()!=0x2f800) {
136        errln("error in Normalizer(UNORM_NONE).first()/next()/last()");
137    }
138    Normalizer::normalize(s, UNORM_NONE, 0, out, status);
139    if(out!=s) {
140        errln("error in Normalizer::normalize(UNORM_NONE)");
141    }
142
143    // test that the same string can be used as source and destination
144    s.setTo((UChar)0xe4);
145    Normalizer::normalize(s, UNORM_NFD, 0, s, status);
146    if(s.charAt(1)!=0x308) {
147        dataerrln("error in Normalizer::normalize(UNORM_NFD, self)");
148    }
149    Normalizer::normalize(s, UNORM_NFC, 0, s, status);
150    if(s.charAt(0)!=0xe4) {
151        dataerrln("error in Normalizer::normalize(UNORM_NFC, self)");
152    }
153    Normalizer::decompose(s, FALSE, 0, s, status);
154    if(s.charAt(1)!=0x308) {
155        dataerrln("error in Normalizer::decompose(self)");
156    }
157    Normalizer::compose(s, FALSE, 0, s, status);
158    if(s.charAt(0)!=0xe4) {
159        dataerrln("error in Normalizer::compose(self)");
160    }
161    Normalizer::concatenate(s, s, s, UNORM_NFC, 0, status);
162    if(s.charAt(1)!=0xe4) {
163        dataerrln("error in Normalizer::decompose(self)");
164    }
165}
166
167#endif
168