1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/********************************************************************
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * COPYRIGHT:
350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Copyright (c) 1997-2010, International Business Machines Corporation and
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * others. All Rights Reserved.
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ********************************************************************/
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h"
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/normlzr.h"
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h"
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/usetiter.h"
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/schriter.h"
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "tstnorm.h"
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_NORMALIZATION
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UErrorCode status = U_ZERO_ERROR;
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// test APIs that are not otherwise used - improve test coverage
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruBasicNormalizerTest::TestNormalizerAPI() {
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // instantiate a Normalizer from a CharacterIterator
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString s=UnicodeString("a\\u0308\\uac00\\U0002f800", "").unescape();
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    s.append(s); // make s a bit longer and more interesting
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    StringCharacterIterator iter(s);
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Normalizer norm(iter, UNORM_NFC);
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(norm.next()!=0xe4) {
2850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataerrln("error in Normalizer(CharacterIterator).next()");
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // test copy constructor
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Normalizer copy(norm);
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(copy.next()!=0xac00) {
3450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataerrln("error in Normalizer(Normalizer(CharacterIterator)).next()");
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // test clone(), ==, and hashCode()
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Normalizer *clone=copy.clone();
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(*clone!=copy) {
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("error in Normalizer(Normalizer(CharacterIterator)).clone()!=copy");
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // clone must have the same hashCode()
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(clone->hashCode()!=copy.hashCode()) {
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("error in Normalizer(Normalizer(CharacterIterator)).clone()->hashCode()!=copy.hashCode()");
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(clone->next()!=0x4e3d) {
4750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataerrln("error in Normalizer(Normalizer(CharacterIterator)).clone()->next()");
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // position changed, must change hashCode()
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(clone->hashCode()==copy.hashCode()) {
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("error in Normalizer(Normalizer(CharacterIterator)).clone()->next().hashCode()==copy.hashCode()");
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete clone;
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    clone=0;
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // test compose() and decompose()
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString tel, nfkc, nfkd;
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    tel=UnicodeString(1, (UChar32)0x2121, 10);
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    tel.insert(1, (UChar)0x301);
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode errorCode=U_ZERO_ERROR;
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Normalizer::compose(tel, TRUE, 0, nfkc, errorCode);
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Normalizer::decompose(tel, TRUE, 0, nfkd, errorCode);
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
6550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataerrln("error in Normalizer::(de)compose(): %s", u_errorName(errorCode));
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        nfkc!=UnicodeString("TE\\u0139TELTELTELTELTELTELTELTELTEL", "").unescape() ||
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        nfkd!=UnicodeString("TEL\\u0301TELTELTELTELTELTELTELTELTEL", "").unescape()
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ) {
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("error in Normalizer::(de)compose(): wrong result(s)");
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // test setIndex()
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    norm.setIndexOnly(3);
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(norm.current()!=0x4e3d) {
7650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataerrln("error in Normalizer(CharacterIterator).setIndex(3)");
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // test setText(CharacterIterator) and getText()
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString out, out2;
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    copy.setText(iter, errorCode);
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("error Normalizer::setText() failed: %s", u_errorName(errorCode));
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        copy.getText(out);
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        iter.getText(out2);
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if( out!=out2 ||
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            copy.startIndex()!=iter.startIndex() ||
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            copy.endIndex()!=iter.endIndex()
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ) {
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln("error in Normalizer::setText() or Normalizer::getText()");
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // test setText(UChar *), getUMode() and setMode()
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    copy.setText(s.getBuffer()+1, s.length()-1, errorCode);
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    copy.setMode(UNORM_NFD);
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(copy.getUMode()!=UNORM_NFD) {
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("error in Normalizer::setMode() or Normalizer::getUMode()");
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(copy.next()!=0x308 || copy.next()!=0x1100) {
10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataerrln("error in Normalizer::setText(UChar *) or Normalizer::setMode()");
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // test setText(UChar *, length=-1)
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // NUL-terminate s
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    s.append((UChar)0);         // append NUL
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    s.truncate(s.length()-1);   // undo length change
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    copy.setText(s.getBuffer()+1, -1, errorCode);
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(copy.endIndex()!=s.length()-1) {
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("error in Normalizer::setText(UChar *, -1)");
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // test setOption() and getOption()
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    copy.setOption(0xaa0000, TRUE);
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    copy.setOption(0x20000, FALSE);
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!copy.getOption(0x880000) || copy.getOption(0x20000)) {
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("error in Normalizer::setOption() or Normalizer::getOption()");
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // test last()/previous() with an internal buffer overflow
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    copy.setText(UnicodeString(1000, (UChar32)0x308, 1000), errorCode);
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(copy.last()!=0x308) {
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("error in Normalizer(1000*U+0308).last()");
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // test UNORM_NONE
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    norm.setMode(UNORM_NONE);
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(norm.first()!=0x61 || norm.next()!=0x308 || norm.last()!=0x2f800) {
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("error in Normalizer(UNORM_NONE).first()/next()/last()");
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Normalizer::normalize(s, UNORM_NONE, 0, out, status);
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(out!=s) {
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("error in Normalizer::normalize(UNORM_NONE)");
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // test that the same string can be used as source and destination
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    s.setTo((UChar)0xe4);
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Normalizer::normalize(s, UNORM_NFD, 0, s, status);
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(s.charAt(1)!=0x308) {
14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataerrln("error in Normalizer::normalize(UNORM_NFD, self)");
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Normalizer::normalize(s, UNORM_NFC, 0, s, status);
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(s.charAt(0)!=0xe4) {
15150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataerrln("error in Normalizer::normalize(UNORM_NFC, self)");
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Normalizer::decompose(s, FALSE, 0, s, status);
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(s.charAt(1)!=0x308) {
15550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataerrln("error in Normalizer::decompose(self)");
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Normalizer::compose(s, FALSE, 0, s, status);
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(s.charAt(0)!=0xe4) {
15950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataerrln("error in Normalizer::compose(self)");
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Normalizer::concatenate(s, s, s, UNORM_NFC, 0, status);
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(s.charAt(1)!=0xe4) {
16350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataerrln("error in Normalizer::decompose(self)");
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
168