1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/******************************************************************** 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * COPYRIGHT: 350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Copyright (c) 1997-2010, International Business Machines Corporation and 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ********************************************************************/ 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h" 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/normlzr.h" 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h" 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/usetiter.h" 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/schriter.h" 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "tstnorm.h" 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_NORMALIZATION 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UErrorCode status = U_ZERO_ERROR; 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// test APIs that are not otherwise used - improve test coverage 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruBasicNormalizerTest::TestNormalizerAPI() { 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // instantiate a Normalizer from a CharacterIterator 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s=UnicodeString("a\\u0308\\uac00\\U0002f800", "").unescape(); 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.append(s); // make s a bit longer and more interesting 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru StringCharacterIterator iter(s); 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer norm(iter, UNORM_NFC); 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(norm.next()!=0xe4) { 2850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("error in Normalizer(CharacterIterator).next()"); 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test copy constructor 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer copy(norm); 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(copy.next()!=0xac00) { 3450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("error in Normalizer(Normalizer(CharacterIterator)).next()"); 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test clone(), ==, and hashCode() 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer *clone=copy.clone(); 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(*clone!=copy) { 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("error in Normalizer(Normalizer(CharacterIterator)).clone()!=copy"); 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // clone must have the same hashCode() 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(clone->hashCode()!=copy.hashCode()) { 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("error in Normalizer(Normalizer(CharacterIterator)).clone()->hashCode()!=copy.hashCode()"); 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(clone->next()!=0x4e3d) { 4750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("error in Normalizer(Normalizer(CharacterIterator)).clone()->next()"); 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // position changed, must change hashCode() 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(clone->hashCode()==copy.hashCode()) { 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("error in Normalizer(Normalizer(CharacterIterator)).clone()->next().hashCode()==copy.hashCode()"); 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete clone; 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru clone=0; 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test compose() and decompose() 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString tel, nfkc, nfkd; 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tel=UnicodeString(1, (UChar32)0x2121, 10); 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tel.insert(1, (UChar)0x301); 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::compose(tel, TRUE, 0, nfkc, errorCode); 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::decompose(tel, TRUE, 0, nfkd, errorCode); 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 6550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("error in Normalizer::(de)compose(): %s", u_errorName(errorCode)); 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if( 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nfkc!=UnicodeString("TE\\u0139TELTELTELTELTELTELTELTELTEL", "").unescape() || 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nfkd!=UnicodeString("TEL\\u0301TELTELTELTELTELTELTELTELTEL", "").unescape() 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("error in Normalizer::(de)compose(): wrong result(s)"); 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test setIndex() 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru norm.setIndexOnly(3); 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(norm.current()!=0x4e3d) { 7650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("error in Normalizer(CharacterIterator).setIndex(3)"); 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test setText(CharacterIterator) and getText() 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString out, out2; 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru copy.setText(iter, errorCode); 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("error Normalizer::setText() failed: %s", u_errorName(errorCode)); 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru copy.getText(out); 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter.getText(out2); 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( out!=out2 || 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru copy.startIndex()!=iter.startIndex() || 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru copy.endIndex()!=iter.endIndex() 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("error in Normalizer::setText() or Normalizer::getText()"); 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test setText(UChar *), getUMode() and setMode() 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru copy.setText(s.getBuffer()+1, s.length()-1, errorCode); 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru copy.setMode(UNORM_NFD); 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(copy.getUMode()!=UNORM_NFD) { 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("error in Normalizer::setMode() or Normalizer::getUMode()"); 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(copy.next()!=0x308 || copy.next()!=0x1100) { 10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("error in Normalizer::setText(UChar *) or Normalizer::setMode()"); 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test setText(UChar *, length=-1) 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // NUL-terminate s 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.append((UChar)0); // append NUL 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.truncate(s.length()-1); // undo length change 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru copy.setText(s.getBuffer()+1, -1, errorCode); 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(copy.endIndex()!=s.length()-1) { 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("error in Normalizer::setText(UChar *, -1)"); 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test setOption() and getOption() 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru copy.setOption(0xaa0000, TRUE); 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru copy.setOption(0x20000, FALSE); 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!copy.getOption(0x880000) || copy.getOption(0x20000)) { 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("error in Normalizer::setOption() or Normalizer::getOption()"); 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test last()/previous() with an internal buffer overflow 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru copy.setText(UnicodeString(1000, (UChar32)0x308, 1000), errorCode); 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(copy.last()!=0x308) { 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("error in Normalizer(1000*U+0308).last()"); 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test UNORM_NONE 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru norm.setMode(UNORM_NONE); 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(norm.first()!=0x61 || norm.next()!=0x308 || norm.last()!=0x2f800) { 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("error in Normalizer(UNORM_NONE).first()/next()/last()"); 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::normalize(s, UNORM_NONE, 0, out, status); 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(out!=s) { 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("error in Normalizer::normalize(UNORM_NONE)"); 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test that the same string can be used as source and destination 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.setTo((UChar)0xe4); 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::normalize(s, UNORM_NFD, 0, s, status); 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s.charAt(1)!=0x308) { 14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("error in Normalizer::normalize(UNORM_NFD, self)"); 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::normalize(s, UNORM_NFC, 0, s, status); 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s.charAt(0)!=0xe4) { 15150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("error in Normalizer::normalize(UNORM_NFC, self)"); 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::decompose(s, FALSE, 0, s, status); 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s.charAt(1)!=0x308) { 15550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("error in Normalizer::decompose(self)"); 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::compose(s, FALSE, 0, s, status); 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s.charAt(0)!=0xe4) { 15950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("error in Normalizer::compose(self)"); 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::concatenate(s, s, s, UNORM_NFC, 0, status); 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s.charAt(1)!=0xe4) { 16350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("error in Normalizer::decompose(self)"); 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 168