1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
48393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius*   Copyright (C) 2002-2012, International Business Machines
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   file name:  uiter.cpp
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   encoding:   US-ASCII
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   tab size:   8 (not used)
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   indentation:4
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created on: 2002jan18
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created by: Markus W. Scherer
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ustring.h"
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/chariter.h"
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/rep.h"
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uiter.h"
22103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf.h"
23103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf8.h"
24103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h"
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h"
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_USE
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define IS_EVEN(n) (((n)&1)==0)
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define IS_POINTER_EVEN(p) IS_EVEN((size_t)p)
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_BEGIN
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* No-Op UCharIterator implementation for illegal input --------------------- */
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerunoopGetIndex(UCharIterator * /*iter*/, UCharIteratorOrigin /*origin*/) {
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return 0;
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerunoopMove(UCharIterator * /*iter*/, int32_t /*delta*/, UCharIteratorOrigin /*origin*/) {
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return 0;
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool U_CALLCONV
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerunoopHasNext(UCharIterator * /*iter*/) {
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return FALSE;
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerunoopCurrent(UCharIterator * /*iter*/) {
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return U_SENTINEL;
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic uint32_t U_CALLCONV
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerunoopGetState(const UCharIterator * /*iter*/) {
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return UITER_NO_STATE;
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerunoopSetState(UCharIterator * /*iter*/, uint32_t /*state*/, UErrorCode *pErrorCode) {
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    *pErrorCode=U_UNSUPPORTED_ERROR;
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UCharIterator noopIterator={
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0, 0, 0, 0, 0, 0,
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    noopGetIndex,
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    noopMove,
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    noopHasNext,
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    noopHasNext,
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    noopCurrent,
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    noopCurrent,
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    noopCurrent,
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    noopGetState,
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    noopSetState
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UCharIterator implementation for simple strings -------------------------- */
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is an implementation of a code unit (UChar) iterator
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for UChar * strings.
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The UCharIterator.context field holds a pointer to the string.
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustringIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    switch(origin) {
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_ZERO:
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_START:
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return iter->start;
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_CURRENT:
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return iter->index;
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_LIMIT:
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return iter->limit;
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_LENGTH:
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return iter->length;
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    default:
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* not a valid origin */
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* Should never get here! */
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return -1;
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustringIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) {
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t pos;
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    switch(origin) {
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_ZERO:
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pos=delta;
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        break;
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_START:
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pos=iter->start+delta;
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        break;
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_CURRENT:
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pos=iter->index+delta;
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        break;
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_LIMIT:
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pos=iter->limit+delta;
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        break;
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_LENGTH:
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pos=iter->length+delta;
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        break;
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    default:
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return -1;  /* Error */
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(pos<iter->start) {
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pos=iter->start;
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(pos>iter->limit) {
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pos=iter->limit;
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return iter->index=pos;
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool U_CALLCONV
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustringIteratorHasNext(UCharIterator *iter) {
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return iter->index<iter->limit;
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool U_CALLCONV
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustringIteratorHasPrevious(UCharIterator *iter) {
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return iter->index>iter->start;
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustringIteratorCurrent(UCharIterator *iter) {
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(iter->index<iter->limit) {
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return ((const UChar *)(iter->context))[iter->index];
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return U_SENTINEL;
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustringIteratorNext(UCharIterator *iter) {
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(iter->index<iter->limit) {
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return ((const UChar *)(iter->context))[iter->index++];
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return U_SENTINEL;
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustringIteratorPrevious(UCharIterator *iter) {
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(iter->index>iter->start) {
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return ((const UChar *)(iter->context))[--iter->index];
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return U_SENTINEL;
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic uint32_t U_CALLCONV
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustringIteratorGetState(const UCharIterator *iter) {
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return (uint32_t)iter->index;
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustringIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) {
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* do nothing */
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(iter==NULL) {
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if((int32_t)state<iter->start || iter->limit<(int32_t)state) {
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        iter->index=(int32_t)state;
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UCharIterator stringIterator={
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0, 0, 0, 0, 0, 0,
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stringIteratorGetIndex,
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stringIteratorMove,
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stringIteratorHasNext,
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stringIteratorHasPrevious,
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stringIteratorCurrent,
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stringIteratorNext,
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stringIteratorPrevious,
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stringIteratorGetState,
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stringIteratorSetState
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuiter_setString(UCharIterator *iter, const UChar *s, int32_t length) {
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(iter!=0) {
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(s!=0 && length>=-1) {
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *iter=stringIterator;
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->context=s;
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(length>=0) {
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                iter->length=length;
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                iter->length=u_strlen(s);
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->limit=iter->length;
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *iter=noopIterator;
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UCharIterator implementation for UTF-16BE strings ------------------------ */
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is an implementation of a code unit (UChar) iterator
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for UTF-16BE strings, i.e., strings in byte-vectors where
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * each UChar is stored as a big-endian pair of bytes.
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The UCharIterator.context field holds a pointer to the string.
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Everything works just like with a normal UChar iterator (uiter_setString),
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * except that UChars are assembled from byte pairs.
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* internal helper function */
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic inline UChar32
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf16BEIteratorGet(UCharIterator *iter, int32_t index) {
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint8_t *p=(const uint8_t *)iter->context;
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return ((UChar)p[2*index]<<8)|(UChar)p[2*index+1];
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf16BEIteratorCurrent(UCharIterator *iter) {
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t index;
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if((index=iter->index)<iter->limit) {
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return utf16BEIteratorGet(iter, index);
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return U_SENTINEL;
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf16BEIteratorNext(UCharIterator *iter) {
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t index;
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if((index=iter->index)<iter->limit) {
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        iter->index=index+1;
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return utf16BEIteratorGet(iter, index);
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return U_SENTINEL;
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf16BEIteratorPrevious(UCharIterator *iter) {
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t index;
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if((index=iter->index)>iter->start) {
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        iter->index=--index;
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return utf16BEIteratorGet(iter, index);
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return U_SENTINEL;
280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UCharIterator utf16BEIterator={
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0, 0, 0, 0, 0, 0,
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stringIteratorGetIndex,
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stringIteratorMove,
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stringIteratorHasNext,
288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stringIteratorHasPrevious,
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    utf16BEIteratorCurrent,
290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    utf16BEIteratorNext,
291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    utf16BEIteratorPrevious,
292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stringIteratorGetState,
294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stringIteratorSetState
295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Count the number of UChars in a UTF-16BE string before a terminating UChar NUL,
299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * i.e., before a pair of 0 bytes where the first 0 byte is at an even
300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offset from s.
301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t
303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf16BE_strlen(const char *s) {
304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(IS_POINTER_EVEN(s)) {
305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /*
306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * even-aligned, call u_strlen(s)
307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * we are probably on a little-endian machine, but searching for UChar NUL
308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * does not care about endianness
309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         */
310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return u_strlen((const UChar *)s);
311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* odd-aligned, search for pair of 0 bytes */
313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const char *p=s;
314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        while(!(*p==0 && p[1]==0)) {
316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            p+=2;
317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return (int32_t)((p-s)/2);
319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2
323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length) {
324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(iter!=NULL) {
325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* allow only even-length strings (the input length counts bytes) */
326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(s!=NULL && (length==-1 || (length>=0 && IS_EVEN(length)))) {
327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* length/=2, except that >>=1 also works for -1 (-1/2==0, -1>>1==-1) */
328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            length>>=1;
329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(U_IS_BIG_ENDIAN && IS_POINTER_EVEN(s)) {
331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* big-endian machine and 2-aligned UTF-16BE string: use normal UChar iterator */
332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                uiter_setString(iter, (const UChar *)s, length);
333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return;
334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *iter=utf16BEIterator;
337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->context=s;
338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(length>=0) {
339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                iter->length=length;
340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                iter->length=utf16BE_strlen(s);
342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->limit=iter->length;
344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *iter=noopIterator;
346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UCharIterator wrapper around CharacterIterator --------------------------- */
351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is wrapper code around a C++ CharacterIterator to
354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * look like a C UCharIterator.
355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The UCharIterator.context field holds a pointer to the CharacterIterator.
357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV
360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharacterIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    switch(origin) {
362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_ZERO:
363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_START:
365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return ((CharacterIterator *)(iter->context))->startIndex();
366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_CURRENT:
367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return ((CharacterIterator *)(iter->context))->getIndex();
368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_LIMIT:
369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return ((CharacterIterator *)(iter->context))->endIndex();
370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_LENGTH:
371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return ((CharacterIterator *)(iter->context))->getLength();
372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    default:
373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* not a valid origin */
374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* Should never get here! */
375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return -1;
376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV
380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharacterIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) {
381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    switch(origin) {
382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_ZERO:
383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ((CharacterIterator *)(iter->context))->setIndex(delta);
384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return ((CharacterIterator *)(iter->context))->getIndex();
385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_START:
386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_CURRENT:
387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_LIMIT:
388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return ((CharacterIterator *)(iter->context))->move(delta, (CharacterIterator::EOrigin)origin);
389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_LENGTH:
390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ((CharacterIterator *)(iter->context))->setIndex(((CharacterIterator *)(iter->context))->getLength()+delta);
391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return ((CharacterIterator *)(iter->context))->getIndex();
392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    default:
393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* not a valid origin */
394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* Should never get here! */
395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return -1;
396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool U_CALLCONV
400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharacterIteratorHasNext(UCharIterator *iter) {
401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return ((CharacterIterator *)(iter->context))->hasNext();
402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool U_CALLCONV
405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharacterIteratorHasPrevious(UCharIterator *iter) {
406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return ((CharacterIterator *)(iter->context))->hasPrevious();
407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV
410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharacterIteratorCurrent(UCharIterator *iter) {
411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 c;
412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    c=((CharacterIterator *)(iter->context))->current();
414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(c!=0xffff || ((CharacterIterator *)(iter->context))->hasNext()) {
415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return c;
416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return U_SENTINEL;
418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV
422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharacterIteratorNext(UCharIterator *iter) {
423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(((CharacterIterator *)(iter->context))->hasNext()) {
424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return ((CharacterIterator *)(iter->context))->nextPostInc();
425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return U_SENTINEL;
427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV
431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharacterIteratorPrevious(UCharIterator *iter) {
432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(((CharacterIterator *)(iter->context))->hasPrevious()) {
433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return ((CharacterIterator *)(iter->context))->previous();
434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return U_SENTINEL;
436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic uint32_t U_CALLCONV
440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharacterIteratorGetState(const UCharIterator *iter) {
441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return ((CharacterIterator *)(iter->context))->getIndex();
442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV
445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharacterIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) {
446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* do nothing */
448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(iter==NULL || iter->context==NULL) {
449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if((int32_t)state<((CharacterIterator *)(iter->context))->startIndex() || ((CharacterIterator *)(iter->context))->endIndex()<(int32_t)state) {
451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ((CharacterIterator *)(iter->context))->setIndex((int32_t)state);
454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UCharIterator characterIteratorWrapper={
458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0, 0, 0, 0, 0, 0,
459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    characterIteratorGetIndex,
460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    characterIteratorMove,
461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    characterIteratorHasNext,
462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    characterIteratorHasPrevious,
463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    characterIteratorCurrent,
464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    characterIteratorNext,
465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    characterIteratorPrevious,
466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    characterIteratorGetState,
468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    characterIteratorSetState
469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2
472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuiter_setCharacterIterator(UCharIterator *iter, CharacterIterator *charIter) {
473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(iter!=0) {
474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(charIter!=0) {
475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *iter=characterIteratorWrapper;
476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->context=charIter;
477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *iter=noopIterator;
479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UCharIterator wrapper around Replaceable --------------------------------- */
484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is an implementation of a code unit (UChar) iterator
487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * based on a Replaceable object.
488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The UCharIterator.context field holds a pointer to the Replaceable.
490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UCharIterator.length and UCharIterator.index hold Replaceable.length()
491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and the iteration index.
492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV
495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerureplaceableIteratorCurrent(UCharIterator *iter) {
496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(iter->index<iter->limit) {
497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return ((Replaceable *)(iter->context))->charAt(iter->index);
498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return U_SENTINEL;
500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV
504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerureplaceableIteratorNext(UCharIterator *iter) {
505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(iter->index<iter->limit) {
506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return ((Replaceable *)(iter->context))->charAt(iter->index++);
507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return U_SENTINEL;
509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV
513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerureplaceableIteratorPrevious(UCharIterator *iter) {
514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(iter->index>iter->start) {
515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return ((Replaceable *)(iter->context))->charAt(--iter->index);
516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return U_SENTINEL;
518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UCharIterator replaceableIterator={
522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0, 0, 0, 0, 0, 0,
523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stringIteratorGetIndex,
524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stringIteratorMove,
525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stringIteratorHasNext,
526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stringIteratorHasPrevious,
527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    replaceableIteratorCurrent,
528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    replaceableIteratorNext,
529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    replaceableIteratorPrevious,
530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stringIteratorGetState,
532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stringIteratorSetState
533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2
536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuiter_setReplaceable(UCharIterator *iter, const Replaceable *rep) {
537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(iter!=0) {
538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(rep!=0) {
539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *iter=replaceableIterator;
540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->context=rep;
541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->limit=iter->length=rep->length();
542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *iter=noopIterator;
544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UCharIterator implementation for UTF-8 strings --------------------------- */
549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Possible, probably necessary only for an implementation for arbitrary
552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * converters:
553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Maintain a buffer (ring buffer?) for a piece of converted 16-bit text.
554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This would require to turn reservedFn into a close function and
555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to introduce a uiter_close(iter).
556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UITER_CNV_CAPACITY 16
559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Minimal implementation:
562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Maintain a single-UChar buffer for an additional surrogate.
563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The caller must not modify start and limit because they are used internally.
564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Use UCharIterator fields as follows:
566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   context        pointer to UTF-8 string
567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   length         UTF-16 length of the string; -1 until lazy evaluation
568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   start          current UTF-8 index
569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   index          current UTF-16 index; may be -1="unknown" after setState()
570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   limit          UTF-8 length of the string
571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   reservedField  supplementary code point
572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Since UCharIterator delivers 16-bit code units, the iteration can be
574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * currently in the middle of the byte sequence for a supplementary code point.
575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * In this case, reservedField will contain that code point and start will
576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * point to after the corresponding byte sequence. The UTF-16 index will be
577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * one less than what it would otherwise be corresponding to the UTF-8 index.
578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Otherwise, reservedField will be 0.
579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Possible optimization for NUL-terminated UTF-8 and UTF-16 strings:
583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Add implementations that do not call strlen() for iteration but check for NUL.
584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV
587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8IteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    switch(origin) {
589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_ZERO:
590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_START:
591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_CURRENT:
593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(iter->index<0) {
594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* the current UTF-16 index is unknown after setState(), count from the beginning */
595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            const uint8_t *s;
596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UChar32 c;
597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            int32_t i, limit, index;
598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            s=(const uint8_t *)iter->context;
600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            i=index=0;
601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            limit=iter->start; /* count up to the UTF-8 index */
602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            while(i<limit) {
6038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                U8_NEXT_OR_FFFD(s, i, limit, c);
6048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                index+=U16_LENGTH(c);
605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->start=i; /* just in case setState() did not get us to a code point boundary */
608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(i==iter->limit) {
609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                iter->length=index; /* in case it was <0 or wrong */
610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(iter->reservedField!=0) {
612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                --index; /* we are in the middle of a supplementary code point */
613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->index=index;
615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return iter->index;
617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_LIMIT:
618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_LENGTH:
619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(iter->length<0) {
620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            const uint8_t *s;
621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UChar32 c;
622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            int32_t i, limit, length;
623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            s=(const uint8_t *)iter->context;
625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(iter->index<0) {
626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /*
627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 * the current UTF-16 index is unknown after setState(),
628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 * we must first count from the beginning to here
629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 */
630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                i=length=0;
631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                limit=iter->start;
632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* count from the beginning to the current index */
634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                while(i<limit) {
6358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    U8_NEXT_OR_FFFD(s, i, limit, c);
6368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    length+=U16_LENGTH(c);
637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* assume i==limit==iter->start, set the UTF-16 index */
640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                iter->start=i; /* just in case setState() did not get us to a code point boundary */
641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                iter->index= iter->reservedField!=0 ? length-1 : length;
642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                i=iter->start;
644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                length=iter->index;
645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(iter->reservedField!=0) {
646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    ++length;
647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* count from the current index to the end */
651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            limit=iter->limit;
652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            while(i<limit) {
6538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                U8_NEXT_OR_FFFD(s, i, limit, c);
6548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                length+=U16_LENGTH(c);
655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->length=length;
657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return iter->length;
659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    default:
660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* not a valid origin */
661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* Should never get here! */
662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return -1;
663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV
667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) {
668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint8_t *s;
669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 c;
670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t pos; /* requested UTF-16 index */
671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i; /* UTF-8 index */
672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool havePos;
673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* calculate the requested UTF-16 index */
675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    switch(origin) {
676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_ZERO:
677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_START:
678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pos=delta;
679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        havePos=TRUE;
680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* iter->index<0 (unknown) is possible */
681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        break;
682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_CURRENT:
683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(iter->index>=0) {
684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            pos=iter->index+delta;
685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            havePos=TRUE;
686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* the current UTF-16 index is unknown after setState(), use only delta */
688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            pos=0;
689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            havePos=FALSE;
690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        break;
692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_LIMIT:
693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UITER_LENGTH:
694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(iter->length>=0) {
695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            pos=iter->length+delta;
696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            havePos=TRUE;
697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* pin to the end, avoid counting the length */
699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->index=-1;
700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->start=iter->limit;
701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->reservedField=0;
702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(delta>=0) {
703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return UITER_UNKNOWN_INDEX;
704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* the current UTF-16 index is unknown, use only delta */
706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                pos=0;
707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                havePos=FALSE;
708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        break;
711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    default:
712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return -1;  /* Error */
713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(havePos) {
716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* shortcuts: pinning to the edges of the string */
717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(pos<=0) {
718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->index=iter->start=iter->reservedField=0;
719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return 0;
720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(iter->length>=0 && pos>=iter->length) {
721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->index=iter->length;
722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->start=iter->limit;
723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->reservedField=0;
724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return iter->index;
725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* minimize the number of U8_NEXT/PREV operations */
728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(iter->index<0 || pos<iter->index/2) {
729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* go forward from the start instead of backward from the current index */
730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->index=iter->start=iter->reservedField=0;
731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(iter->length>=0 && (iter->length-pos)<(pos-iter->index)) {
732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /*
733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * if we have the UTF-16 index and length and the new position is
734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * closer to the end than the current index,
735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * then go backward from the end instead of forward from the current index
736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             */
737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->index=iter->length;
738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->start=iter->limit;
739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->reservedField=0;
740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        delta=pos-iter->index;
743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(delta==0) {
744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return iter->index; /* nothing to do */
745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* move relative to unknown UTF-16 index */
748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(delta==0) {
749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return UITER_UNKNOWN_INDEX; /* nothing to do */
750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(-delta>=iter->start) {
751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* moving backwards by more UChars than there are UTF-8 bytes, pin to 0 */
752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->index=iter->start=iter->reservedField=0;
753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return 0;
754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(delta>=(iter->limit-iter->start)) {
755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* moving forward by more UChars than the remaining UTF-8 bytes, pin to the end */
756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->index=iter->length; /* may or may not be <0 (unknown) */
757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->start=iter->limit;
758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->reservedField=0;
759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return iter->index>=0 ? iter->index : (int32_t)UITER_UNKNOWN_INDEX;
760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* delta!=0 */
764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* move towards the requested position, pin to the edges of the string */
766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    s=(const uint8_t *)iter->context;
767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pos=iter->index; /* could be <0 (unknown) */
768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    i=iter->start;
769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(delta>0) {
770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* go forward */
771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t limit=iter->limit;
772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(iter->reservedField!=0) {
773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->reservedField=0;
774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ++pos;
775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            --delta;
776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        while(delta>0 && i<limit) {
7788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            U8_NEXT_OR_FFFD(s, i, limit, c);
7798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if(c<=0xffff) {
780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ++pos;
781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                --delta;
782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(delta>=2) {
783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                pos+=2;
784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                delta-=2;
785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else /* delta==1 */ {
786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* stop in the middle of a supplementary code point */
787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                iter->reservedField=c;
788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ++pos;
789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break; /* delta=0; */
790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(i==limit) {
793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(iter->length<0 && iter->index>=0) {
794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                iter->length= iter->reservedField==0 ? pos : pos+1;
795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(iter->index<0 && iter->length>=0) {
796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                iter->index= iter->reservedField==0 ? iter->length : iter->length-1;
797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else /* delta<0 */ {
800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* go backward */
801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(iter->reservedField!=0) {
802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->reservedField=0;
803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            i-=4; /* we stayed behind the supplementary code point; go before it now */
804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            --pos;
805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ++delta;
806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        while(delta<0 && i>0) {
8088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            U8_PREV_OR_FFFD(s, 0, i, c);
8098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if(c<=0xffff) {
810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                --pos;
811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ++delta;
812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(delta<=-2) {
813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                pos-=2;
814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                delta+=2;
815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else /* delta==-1 */ {
816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* stop in the middle of a supplementary code point */
817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                i+=4; /* back to behind this supplementary code point for consistent state */
818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                iter->reservedField=c;
819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                --pos;
820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break; /* delta=0; */
821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    iter->start=i;
826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(iter->index>=0) {
827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return iter->index=pos;
828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* we started with index<0 (unknown) so pos is bogus */
830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(i<=1) {
831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return iter->index=i; /* reached the beginning */
832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* we still don't know the UTF-16 index */
834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return UITER_UNKNOWN_INDEX;
835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool U_CALLCONV
840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8IteratorHasNext(UCharIterator *iter) {
841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return iter->start<iter->limit || iter->reservedField!=0;
842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool U_CALLCONV
845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8IteratorHasPrevious(UCharIterator *iter) {
846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return iter->start>0;
847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV
850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8IteratorCurrent(UCharIterator *iter) {
851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(iter->reservedField!=0) {
852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return U16_TRAIL(iter->reservedField);
853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(iter->start<iter->limit) {
854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const uint8_t *s=(const uint8_t *)iter->context;
855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UChar32 c;
856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t i=iter->start;
857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
8588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        U8_NEXT_OR_FFFD(s, i, iter->limit, c);
8598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if(c<=0xffff) {
860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return c;
861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return U16_LEAD(c);
863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return U_SENTINEL;
866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV
870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8IteratorNext(UCharIterator *iter) {
871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t index;
872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(iter->reservedField!=0) {
874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UChar trail=U16_TRAIL(iter->reservedField);
875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        iter->reservedField=0;
876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if((index=iter->index)>=0) {
877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->index=index+1;
878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return trail;
880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(iter->start<iter->limit) {
881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const uint8_t *s=(const uint8_t *)iter->context;
882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UChar32 c;
883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
8848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        U8_NEXT_OR_FFFD(s, iter->start, iter->limit, c);
885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if((index=iter->index)>=0) {
886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->index=++index;
887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(iter->length<0 && iter->start==iter->limit) {
888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                iter->length= c<=0xffff ? index : index+1;
889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(iter->start==iter->limit && iter->length>=0) {
891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->index= c<=0xffff ? iter->length : iter->length-1;
892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
8938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if(c<=0xffff) {
894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return c;
895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->reservedField=c;
897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return U16_LEAD(c);
898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return U_SENTINEL;
901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV
905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8IteratorPrevious(UCharIterator *iter) {
906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t index;
907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(iter->reservedField!=0) {
909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UChar lead=U16_LEAD(iter->reservedField);
910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        iter->reservedField=0;
911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        iter->start-=4; /* we stayed behind the supplementary code point; go before it now */
912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if((index=iter->index)>0) {
913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->index=index-1;
914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return lead;
916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(iter->start>0) {
917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const uint8_t *s=(const uint8_t *)iter->context;
918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UChar32 c;
919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
9208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        U8_PREV_OR_FFFD(s, 0, iter->start, c);
921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if((index=iter->index)>0) {
922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->index=index-1;
923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(iter->start<=1) {
924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->index= c<=0xffff ? iter->start : iter->start+1;
925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
9268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if(c<=0xffff) {
927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return c;
928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->start+=4; /* back to behind this supplementary code point for consistent state */
930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->reservedField=c;
931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return U16_TRAIL(c);
932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return U_SENTINEL;
935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic uint32_t U_CALLCONV
939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8IteratorGetState(const UCharIterator *iter) {
940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t state=(uint32_t)(iter->start<<1);
941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(iter->reservedField!=0) {
942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        state|=1;
943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return state;
945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV
948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8IteratorSetState(UCharIterator *iter,
949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     uint32_t state,
950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     UErrorCode *pErrorCode)
951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* do nothing */
954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(iter==NULL) {
955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(state==utf8IteratorGetState(iter)) {
957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* setting to the current state: no-op */
958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t index=(int32_t)(state>>1); /* UTF-8 index */
960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        state&=1; /* 1 if in surrogate pair, must be index>=4 */
961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if((state==0 ? index<0 : index<4) || iter->limit<index) {
963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->start=index; /* restore UTF-8 byte index */
966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(index<=1) {
967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                iter->index=index;
968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                iter->index=-1; /* unknown UTF-16 index */
970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(state==0) {
972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                iter->reservedField=0;
973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* verified index>=4 above */
975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                UChar32 c;
9768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                U8_PREV_OR_FFFD((const uint8_t *)iter->context, 0, index, c);
977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(c<=0xffff) {
978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    iter->reservedField=c;
981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UCharIterator utf8Iterator={
988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0, 0, 0, 0, 0, 0,
989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    utf8IteratorGetIndex,
990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    utf8IteratorMove,
991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    utf8IteratorHasNext,
992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    utf8IteratorHasPrevious,
993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    utf8IteratorCurrent,
994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    utf8IteratorNext,
995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    utf8IteratorPrevious,
996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    utf8IteratorGetState,
998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    utf8IteratorSetState
999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
1000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2
1002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuiter_setUTF8(UCharIterator *iter, const char *s, int32_t length) {
1003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(iter!=0) {
1004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(s!=0 && length>=-1) {
1005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *iter=utf8Iterator;
1006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->context=s;
1007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(length>=0) {
1008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                iter->limit=length;
1009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
1010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                iter->limit=(int32_t)uprv_strlen(s);
1011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->length= iter->limit<=1 ? iter->limit : -1;
1013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
1014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *iter=noopIterator;
1015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Helper functions --------------------------------------------------------- */
1020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI UChar32 U_EXPORT2
1022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuiter_current32(UCharIterator *iter) {
1023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 c, c2;
1024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    c=iter->current(iter);
1026103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(U16_IS_SURROGATE(c)) {
1027103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(U16_IS_SURROGATE_LEAD(c)) {
1028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /*
1029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * go to the next code unit
1030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * we know that we are not at the limit because c!=U_SENTINEL
1031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             */
1032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->move(iter, 1, UITER_CURRENT);
1033103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            if(U16_IS_TRAIL(c2=iter->current(iter))) {
1034103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                c=U16_GET_SUPPLEMENTARY(c, c2);
1035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* undo index movement */
1038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->move(iter, -1, UITER_CURRENT);
1039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
1040103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            if(U16_IS_LEAD(c2=iter->previous(iter))) {
1041103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                c=U16_GET_SUPPLEMENTARY(c2, c);
1042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c2>=0) {
1044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* undo index movement */
1045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                iter->move(iter, 1, UITER_CURRENT);
1046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return c;
1050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI UChar32 U_EXPORT2
1053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuiter_next32(UCharIterator *iter) {
1054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 c, c2;
1055ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1056ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    c=iter->next(iter);
1057103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(U16_IS_LEAD(c)) {
1058103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(U16_IS_TRAIL(c2=iter->next(iter))) {
1059103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            c=U16_GET_SUPPLEMENTARY(c, c2);
1060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(c2>=0) {
1061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* unmatched first surrogate, undo index movement */
1062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->move(iter, -1, UITER_CURRENT);
1063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return c;
1066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI UChar32 U_EXPORT2
1069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuiter_previous32(UCharIterator *iter) {
1070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 c, c2;
1071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    c=iter->previous(iter);
1073103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(U16_IS_TRAIL(c)) {
1074103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(U16_IS_LEAD(c2=iter->previous(iter))) {
1075103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            c=U16_GET_SUPPLEMENTARY(c2, c);
1076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(c2>=0) {
1077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* unmatched second surrogate, undo index movement */
1078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            iter->move(iter, 1, UITER_CURRENT);
1079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1081ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return c;
1082ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1083ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1084ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI uint32_t U_EXPORT2
1085ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuiter_getState(const UCharIterator *iter) {
1086ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(iter==NULL || iter->getState==NULL) {
1087ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return UITER_NO_STATE;
1088ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
1089ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return iter->getState(iter);
1090ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1091ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1092ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1093ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2
1094ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) {
1095ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1096ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* do nothing */
1097ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(iter==NULL) {
1098ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1099ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(iter->setState==NULL) {
1100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_UNSUPPORTED_ERROR;
1101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
1102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        iter->setState(iter, state, pErrorCode);
1103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_END
1107