1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius*   Copyright (C) 2001-2011, International Business Machines
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   file name:  casetrn.cpp
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   encoding:   US-ASCII
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   tab size:   8 (not used)
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   indentation:4
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created on: 2004sep03
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created by: Markus W. Scherer
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Implementation class for lower-/upper-/title-casing transliterators.
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uchar.h"
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ustring.h"
2583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utf.h"
2683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utf16.h"
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "tolowtrn.h"
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucase.h"
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cpputils.h"
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* case context iterator using a Replaceable */
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC UChar32 U_CALLCONV
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutrans_rep_caseContextIterator(void *context, int8_t dir)
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    U_NAMESPACE_USE
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UCaseContext *csc=(UCaseContext *)context;
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Replaceable *rep=(Replaceable *)csc->p;
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 c;
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(dir<0) {
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* reset for backward iteration */
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        csc->index=csc->cpStart;
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        csc->dir=dir;
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(dir>0) {
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* reset for forward iteration */
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        csc->index=csc->cpLimit;
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        csc->dir=dir;
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* continue current iteration direction */
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        dir=csc->dir;
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // automatically adjust start and limit if the Replaceable disagrees
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // with the original values
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(dir<0) {
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(csc->start<csc->index) {
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c=rep->char32At(csc->index-1);
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c<0) {
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                csc->start=csc->index;
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                csc->index-=U16_LENGTH(c);
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return c;
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // detect, and store in csc->b1, if we hit the limit
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(csc->index<csc->limit) {
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c=rep->char32At(csc->index);
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c<0) {
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                csc->limit=csc->index;
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                csc->b1=TRUE;
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                csc->index+=U16_LENGTH(c);
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return c;
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            csc->b1=TRUE;
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return U_SENTINEL;
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
8685bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoUOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(CaseMapTransliterator)
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constructs a transliterator.
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCaseMapTransliterator::CaseMapTransliterator(const UnicodeString &id, UCaseMapFull *map) :
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator(id, 0),
9327f654740f2a26ad62a5c155af9199af9e69b889claireho    fCsp(ucase_getSingleton()),
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fMap(map)
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // TODO test incremental mode with context-sensitive text (e.g. greek sigma)
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // TODO need to call setMaximumContextLength()?!
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor.
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCaseMapTransliterator::~CaseMapTransliterator() {
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copy constructor.
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCaseMapTransliterator::CaseMapTransliterator(const CaseMapTransliterator& o) :
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator(o),
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fCsp(o.fCsp), fMap(o.fMap)
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Assignment operator.
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*CaseMapTransliterator& CaseMapTransliterator::operator=(const CaseMapTransliterator& o) {
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator::operator=(o);
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fCsp = o.fCsp;
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fMap = o.fMap;
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return *this;
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}*/
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Transliterator API.
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
12885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/*Transliterator* CaseMapTransliterator::clone(void) const {
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return new CaseMapTransliterator(*this);
13085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}*/
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implements {@link Transliterator#handleTransliterate}.
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CaseMapTransliterator::handleTransliterate(Replaceable& text,
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                 UTransPosition& offsets,
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                 UBool isIncremental) const
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (offsets.start >= offsets.limit) {
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UCaseContext csc;
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_memset(&csc, 0, sizeof(csc));
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    csc.p = &text;
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    csc.start = offsets.contextStart;
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    csc.limit = offsets.contextLimit;
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString tmp;
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *s;
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 c;
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t textPos, delta, result, locCache=0;
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(textPos=offsets.start; textPos<offsets.limit;) {
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        csc.cpStart=textPos;
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        c=text.char32At(textPos);
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        csc.cpLimit=textPos+=U16_LENGTH(c);
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        result=fMap(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache);
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(csc.b1 && isIncremental) {
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // fMap() tried to look beyond the context limit
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // wait for more input
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            offsets.start=csc.cpStart;
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return;
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(result>=0) {
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // replace the current code point with its full case mapping result
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // see UCASE_MAX_STRING_LENGTH
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(result<=UCASE_MAX_STRING_LENGTH) {
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // string s[result]
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                tmp.setTo(FALSE, s, result);
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                delta=result-U16_LENGTH(c);
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // single code point
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                tmp.setTo(result);
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                delta=tmp.length()-U16_LENGTH(c);
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            text.handleReplaceBetween(csc.cpStart, textPos, tmp);
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(delta!=0) {
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                textPos+=delta;
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                csc.limit=offsets.contextLimit+=delta;
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsets.limit+=delta;
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    offsets.start=textPos;
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */
194