1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* Copyright (C) 2001-2011, International Business Machines 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* file name: casetrn.cpp 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* encoding: US-ASCII 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* tab size: 8 (not used) 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* indentation:4 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created on: 2004sep03 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created by: Markus W. Scherer 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Implementation class for lower-/upper-/title-casing transliterators. 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uchar.h" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ustring.h" 2583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utf.h" 2683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utf16.h" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "tolowtrn.h" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucase.h" 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cpputils.h" 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* case context iterator using a Replaceable */ 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC UChar32 U_CALLCONV 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutrans_rep_caseContextIterator(void *context, int8_t dir) 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_NAMESPACE_USE 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCaseContext *csc=(UCaseContext *)context; 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Replaceable *rep=(Replaceable *)csc->p; 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(dir<0) { 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset for backward iteration */ 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csc->index=csc->cpStart; 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csc->dir=dir; 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(dir>0) { 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset for forward iteration */ 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csc->index=csc->cpLimit; 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csc->dir=dir; 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue current iteration direction */ 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dir=csc->dir; 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // automatically adjust start and limit if the Replaceable disagrees 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // with the original values 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(dir<0) { 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(csc->start<csc->index) { 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=rep->char32At(csc->index-1); 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<0) { 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csc->start=csc->index; 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csc->index-=U16_LENGTH(c); 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return c; 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // detect, and store in csc->b1, if we hit the limit 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(csc->index<csc->limit) { 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=rep->char32At(csc->index); 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<0) { 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csc->limit=csc->index; 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csc->b1=TRUE; 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csc->index+=U16_LENGTH(c); 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return c; 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csc->b1=TRUE; 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_SENTINEL; 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 8685bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoUOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(CaseMapTransliterator) 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constructs a transliterator. 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCaseMapTransliterator::CaseMapTransliterator(const UnicodeString &id, UCaseMapFull *map) : 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(id, 0), 9327f654740f2a26ad62a5c155af9199af9e69b889claireho fCsp(ucase_getSingleton()), 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fMap(map) 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // TODO test incremental mode with context-sensitive text (e.g. greek sigma) 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // TODO need to call setMaximumContextLength()?! 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor. 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCaseMapTransliterator::~CaseMapTransliterator() { 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copy constructor. 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCaseMapTransliterator::CaseMapTransliterator(const CaseMapTransliterator& o) : 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(o), 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fCsp(o.fCsp), fMap(o.fMap) 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Assignment operator. 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*CaseMapTransliterator& CaseMapTransliterator::operator=(const CaseMapTransliterator& o) { 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator::operator=(o); 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fCsp = o.fCsp; 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fMap = o.fMap; 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return *this; 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}*/ 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Transliterator API. 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 12885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/*Transliterator* CaseMapTransliterator::clone(void) const { 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return new CaseMapTransliterator(*this); 13085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}*/ 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implements {@link Transliterator#handleTransliterate}. 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CaseMapTransliterator::handleTransliterate(Replaceable& text, 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UTransPosition& offsets, 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isIncremental) const 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (offsets.start >= offsets.limit) { 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCaseContext csc; 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memset(&csc, 0, sizeof(csc)); 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csc.p = &text; 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csc.start = offsets.contextStart; 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csc.limit = offsets.contextLimit; 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString tmp; 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *s; 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t textPos, delta, result, locCache=0; 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(textPos=offsets.start; textPos<offsets.limit;) { 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csc.cpStart=textPos; 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=text.char32At(textPos); 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csc.cpLimit=textPos+=U16_LENGTH(c); 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result=fMap(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache); 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(csc.b1 && isIncremental) { 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // fMap() tried to look beyond the context limit 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // wait for more input 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets.start=csc.cpStart; 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(result>=0) { 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // replace the current code point with its full case mapping result 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // see UCASE_MAX_STRING_LENGTH 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(result<=UCASE_MAX_STRING_LENGTH) { 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // string s[result] 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tmp.setTo(FALSE, s, result); 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delta=result-U16_LENGTH(c); 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // single code point 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tmp.setTo(result); 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delta=tmp.length()-U16_LENGTH(c); 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text.handleReplaceBetween(csc.cpStart, textPos, tmp); 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(delta!=0) { 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru textPos+=delta; 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csc.limit=offsets.contextLimit+=delta; 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets.limit+=delta; 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets.start=textPos; 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 194