1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 48393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius* Copyright (C) 2002-2012, International Business Machines 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* file name: uiter.cpp 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* encoding: US-ASCII 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* tab size: 8 (not used) 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* indentation:4 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created on: 2002jan18 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created by: Markus W. Scherer 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ustring.h" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/chariter.h" 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/rep.h" 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uiter.h" 22103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf.h" 23103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf8.h" 24103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h" 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h" 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_USE 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define IS_EVEN(n) (((n)&1)==0) 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define IS_POINTER_EVEN(p) IS_EVEN((size_t)p) 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_BEGIN 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* No-Op UCharIterator implementation for illegal input --------------------- */ 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerunoopGetIndex(UCharIterator * /*iter*/, UCharIteratorOrigin /*origin*/) { 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerunoopMove(UCharIterator * /*iter*/, int32_t /*delta*/, UCharIteratorOrigin /*origin*/) { 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool U_CALLCONV 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerunoopHasNext(UCharIterator * /*iter*/) { 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerunoopCurrent(UCharIterator * /*iter*/) { 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_SENTINEL; 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic uint32_t U_CALLCONV 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerunoopGetState(const UCharIterator * /*iter*/) { 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return UITER_NO_STATE; 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerunoopSetState(UCharIterator * /*iter*/, uint32_t /*state*/, UErrorCode *pErrorCode) { 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_UNSUPPORTED_ERROR; 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UCharIterator noopIterator={ 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 0, 0, 0, 0, 0, 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru noopGetIndex, 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru noopMove, 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru noopHasNext, 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru noopHasNext, 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru noopCurrent, 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru noopCurrent, 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru noopCurrent, 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru noopGetState, 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru noopSetState 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UCharIterator implementation for simple strings -------------------------- */ 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is an implementation of a code unit (UChar) iterator 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for UChar * strings. 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The UCharIterator.context field holds a pointer to the string. 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustringIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) { 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(origin) { 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_ZERO: 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_START: 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return iter->start; 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_CURRENT: 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return iter->index; 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_LIMIT: 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return iter->limit; 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_LENGTH: 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return iter->length; 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not a valid origin */ 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Should never get here! */ 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -1; 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustringIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) { 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t pos; 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(origin) { 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_ZERO: 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos=delta; 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_START: 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos=iter->start+delta; 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_CURRENT: 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos=iter->index+delta; 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_LIMIT: 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos=iter->limit+delta; 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_LENGTH: 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos=iter->length+delta; 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -1; /* Error */ 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pos<iter->start) { 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos=iter->start; 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(pos>iter->limit) { 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos=iter->limit; 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return iter->index=pos; 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool U_CALLCONV 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustringIteratorHasNext(UCharIterator *iter) { 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return iter->index<iter->limit; 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool U_CALLCONV 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustringIteratorHasPrevious(UCharIterator *iter) { 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return iter->index>iter->start; 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustringIteratorCurrent(UCharIterator *iter) { 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->index<iter->limit) { 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ((const UChar *)(iter->context))[iter->index]; 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_SENTINEL; 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustringIteratorNext(UCharIterator *iter) { 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->index<iter->limit) { 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ((const UChar *)(iter->context))[iter->index++]; 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_SENTINEL; 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustringIteratorPrevious(UCharIterator *iter) { 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->index>iter->start) { 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ((const UChar *)(iter->context))[--iter->index]; 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_SENTINEL; 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic uint32_t U_CALLCONV 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustringIteratorGetState(const UCharIterator *iter) { 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (uint32_t)iter->index; 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustringIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) { 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* do nothing */ 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(iter==NULL) { 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((int32_t)state<iter->start || iter->limit<(int32_t)state) { 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->index=(int32_t)state; 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UCharIterator stringIterator={ 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 0, 0, 0, 0, 0, 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stringIteratorGetIndex, 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stringIteratorMove, 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stringIteratorHasNext, 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stringIteratorHasPrevious, 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stringIteratorCurrent, 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stringIteratorNext, 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stringIteratorPrevious, 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stringIteratorGetState, 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stringIteratorSetState 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuiter_setString(UCharIterator *iter, const UChar *s, int32_t length) { 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter!=0) { 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(s!=0 && length>=-1) { 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *iter=stringIterator; 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->context=s; 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>=0) { 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->length=length; 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->length=u_strlen(s); 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->limit=iter->length; 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *iter=noopIterator; 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UCharIterator implementation for UTF-16BE strings ------------------------ */ 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is an implementation of a code unit (UChar) iterator 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for UTF-16BE strings, i.e., strings in byte-vectors where 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * each UChar is stored as a big-endian pair of bytes. 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The UCharIterator.context field holds a pointer to the string. 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Everything works just like with a normal UChar iterator (uiter_setString), 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * except that UChars are assembled from byte pairs. 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* internal helper function */ 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic inline UChar32 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf16BEIteratorGet(UCharIterator *iter, int32_t index) { 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *p=(const uint8_t *)iter->context; 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ((UChar)p[2*index]<<8)|(UChar)p[2*index+1]; 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf16BEIteratorCurrent(UCharIterator *iter) { 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t index; 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((index=iter->index)<iter->limit) { 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return utf16BEIteratorGet(iter, index); 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_SENTINEL; 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf16BEIteratorNext(UCharIterator *iter) { 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t index; 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((index=iter->index)<iter->limit) { 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->index=index+1; 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return utf16BEIteratorGet(iter, index); 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_SENTINEL; 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf16BEIteratorPrevious(UCharIterator *iter) { 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t index; 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((index=iter->index)>iter->start) { 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->index=--index; 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return utf16BEIteratorGet(iter, index); 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_SENTINEL; 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UCharIterator utf16BEIterator={ 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 0, 0, 0, 0, 0, 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stringIteratorGetIndex, 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stringIteratorMove, 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stringIteratorHasNext, 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stringIteratorHasPrevious, 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf16BEIteratorCurrent, 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf16BEIteratorNext, 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf16BEIteratorPrevious, 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stringIteratorGetState, 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stringIteratorSetState 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Count the number of UChars in a UTF-16BE string before a terminating UChar NUL, 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * i.e., before a pair of 0 bytes where the first 0 byte is at an even 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offset from s. 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf16BE_strlen(const char *s) { 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(IS_POINTER_EVEN(s)) { 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * even-aligned, call u_strlen(s) 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * we are probably on a little-endian machine, but searching for UChar NUL 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * does not care about endianness 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return u_strlen((const UChar *)s); 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* odd-aligned, search for pair of 0 bytes */ 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *p=s; 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(!(*p==0 && p[1]==0)) { 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru p+=2; 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (int32_t)((p-s)/2); 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length) { 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter!=NULL) { 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* allow only even-length strings (the input length counts bytes) */ 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(s!=NULL && (length==-1 || (length>=0 && IS_EVEN(length)))) { 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* length/=2, except that >>=1 also works for -1 (-1/2==0, -1>>1==-1) */ 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length>>=1; 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_IS_BIG_ENDIAN && IS_POINTER_EVEN(s)) { 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* big-endian machine and 2-aligned UTF-16BE string: use normal UChar iterator */ 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uiter_setString(iter, (const UChar *)s, length); 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *iter=utf16BEIterator; 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->context=s; 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>=0) { 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->length=length; 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->length=utf16BE_strlen(s); 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->limit=iter->length; 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *iter=noopIterator; 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UCharIterator wrapper around CharacterIterator --------------------------- */ 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is wrapper code around a C++ CharacterIterator to 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * look like a C UCharIterator. 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The UCharIterator.context field holds a pointer to the CharacterIterator. 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharacterIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) { 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(origin) { 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_ZERO: 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_START: 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ((CharacterIterator *)(iter->context))->startIndex(); 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_CURRENT: 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ((CharacterIterator *)(iter->context))->getIndex(); 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_LIMIT: 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ((CharacterIterator *)(iter->context))->endIndex(); 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_LENGTH: 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ((CharacterIterator *)(iter->context))->getLength(); 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not a valid origin */ 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Should never get here! */ 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -1; 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharacterIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) { 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(origin) { 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_ZERO: 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((CharacterIterator *)(iter->context))->setIndex(delta); 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ((CharacterIterator *)(iter->context))->getIndex(); 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_START: 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_CURRENT: 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_LIMIT: 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ((CharacterIterator *)(iter->context))->move(delta, (CharacterIterator::EOrigin)origin); 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_LENGTH: 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((CharacterIterator *)(iter->context))->setIndex(((CharacterIterator *)(iter->context))->getLength()+delta); 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ((CharacterIterator *)(iter->context))->getIndex(); 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not a valid origin */ 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Should never get here! */ 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -1; 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool U_CALLCONV 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharacterIteratorHasNext(UCharIterator *iter) { 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ((CharacterIterator *)(iter->context))->hasNext(); 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool U_CALLCONV 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharacterIteratorHasPrevious(UCharIterator *iter) { 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ((CharacterIterator *)(iter->context))->hasPrevious(); 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharacterIteratorCurrent(UCharIterator *iter) { 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((CharacterIterator *)(iter->context))->current(); 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0xffff || ((CharacterIterator *)(iter->context))->hasNext()) { 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return c; 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_SENTINEL; 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharacterIteratorNext(UCharIterator *iter) { 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(((CharacterIterator *)(iter->context))->hasNext()) { 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ((CharacterIterator *)(iter->context))->nextPostInc(); 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_SENTINEL; 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharacterIteratorPrevious(UCharIterator *iter) { 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(((CharacterIterator *)(iter->context))->hasPrevious()) { 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ((CharacterIterator *)(iter->context))->previous(); 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_SENTINEL; 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic uint32_t U_CALLCONV 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharacterIteratorGetState(const UCharIterator *iter) { 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ((CharacterIterator *)(iter->context))->getIndex(); 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucharacterIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) { 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* do nothing */ 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(iter==NULL || iter->context==NULL) { 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if((int32_t)state<((CharacterIterator *)(iter->context))->startIndex() || ((CharacterIterator *)(iter->context))->endIndex()<(int32_t)state) { 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((CharacterIterator *)(iter->context))->setIndex((int32_t)state); 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UCharIterator characterIteratorWrapper={ 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 0, 0, 0, 0, 0, 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru characterIteratorGetIndex, 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru characterIteratorMove, 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru characterIteratorHasNext, 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru characterIteratorHasPrevious, 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru characterIteratorCurrent, 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru characterIteratorNext, 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru characterIteratorPrevious, 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru characterIteratorGetState, 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru characterIteratorSetState 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuiter_setCharacterIterator(UCharIterator *iter, CharacterIterator *charIter) { 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter!=0) { 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(charIter!=0) { 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *iter=characterIteratorWrapper; 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->context=charIter; 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *iter=noopIterator; 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UCharIterator wrapper around Replaceable --------------------------------- */ 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is an implementation of a code unit (UChar) iterator 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * based on a Replaceable object. 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The UCharIterator.context field holds a pointer to the Replaceable. 490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UCharIterator.length and UCharIterator.index hold Replaceable.length() 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and the iteration index. 492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV 495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerureplaceableIteratorCurrent(UCharIterator *iter) { 496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->index<iter->limit) { 497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ((Replaceable *)(iter->context))->charAt(iter->index); 498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_SENTINEL; 500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerureplaceableIteratorNext(UCharIterator *iter) { 505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->index<iter->limit) { 506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ((Replaceable *)(iter->context))->charAt(iter->index++); 507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_SENTINEL; 509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV 513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerureplaceableIteratorPrevious(UCharIterator *iter) { 514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->index>iter->start) { 515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ((Replaceable *)(iter->context))->charAt(--iter->index); 516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_SENTINEL; 518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UCharIterator replaceableIterator={ 522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 0, 0, 0, 0, 0, 523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stringIteratorGetIndex, 524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stringIteratorMove, 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stringIteratorHasNext, 526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stringIteratorHasPrevious, 527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru replaceableIteratorCurrent, 528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru replaceableIteratorNext, 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru replaceableIteratorPrevious, 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stringIteratorGetState, 532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stringIteratorSetState 533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2 536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuiter_setReplaceable(UCharIterator *iter, const Replaceable *rep) { 537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter!=0) { 538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(rep!=0) { 539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *iter=replaceableIterator; 540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->context=rep; 541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->limit=iter->length=rep->length(); 542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *iter=noopIterator; 544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UCharIterator implementation for UTF-8 strings --------------------------- */ 549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Possible, probably necessary only for an implementation for arbitrary 552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * converters: 553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Maintain a buffer (ring buffer?) for a piece of converted 16-bit text. 554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This would require to turn reservedFn into a close function and 555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to introduce a uiter_close(iter). 556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UITER_CNV_CAPACITY 16 559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Minimal implementation: 562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Maintain a single-UChar buffer for an additional surrogate. 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The caller must not modify start and limit because they are used internally. 564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Use UCharIterator fields as follows: 566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * context pointer to UTF-8 string 567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * length UTF-16 length of the string; -1 until lazy evaluation 568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * start current UTF-8 index 569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * index current UTF-16 index; may be -1="unknown" after setState() 570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * limit UTF-8 length of the string 571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * reservedField supplementary code point 572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Since UCharIterator delivers 16-bit code units, the iteration can be 574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * currently in the middle of the byte sequence for a supplementary code point. 575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * In this case, reservedField will contain that code point and start will 576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * point to after the corresponding byte sequence. The UTF-16 index will be 577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * one less than what it would otherwise be corresponding to the UTF-8 index. 578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Otherwise, reservedField will be 0. 579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Possible optimization for NUL-terminated UTF-8 and UTF-16 strings: 583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Add implementations that do not call strlen() for iteration but check for NUL. 584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV 587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8IteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) { 588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(origin) { 589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_ZERO: 590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_START: 591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_CURRENT: 593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->index<0) { 594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the current UTF-16 index is unknown after setState(), count from the beginning */ 595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *s; 596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, limit, index; 598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s=(const uint8_t *)iter->context; 600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=index=0; 601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limit=iter->start; /* count up to the UTF-8 index */ 602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(i<limit) { 6038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius U8_NEXT_OR_FFFD(s, i, limit, c); 6048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius index+=U16_LENGTH(c); 605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->start=i; /* just in case setState() did not get us to a code point boundary */ 608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(i==iter->limit) { 609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->length=index; /* in case it was <0 or wrong */ 610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->reservedField!=0) { 612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --index; /* we are in the middle of a supplementary code point */ 613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->index=index; 615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return iter->index; 617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_LIMIT: 618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_LENGTH: 619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->length<0) { 620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *s; 621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, limit, length; 623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s=(const uint8_t *)iter->context; 625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->index<0) { 626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the current UTF-16 index is unknown after setState(), 628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * we must first count from the beginning to here 629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=length=0; 631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limit=iter->start; 632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* count from the beginning to the current index */ 634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(i<limit) { 6358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius U8_NEXT_OR_FFFD(s, i, limit, c); 6368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius length+=U16_LENGTH(c); 637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* assume i==limit==iter->start, set the UTF-16 index */ 640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->start=i; /* just in case setState() did not get us to a code point boundary */ 641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->index= iter->reservedField!=0 ? length-1 : length; 642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=iter->start; 644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=iter->index; 645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->reservedField!=0) { 646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++length; 647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* count from the current index to the end */ 651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limit=iter->limit; 652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(i<limit) { 6538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius U8_NEXT_OR_FFFD(s, i, limit, c); 6548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius length+=U16_LENGTH(c); 655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->length=length; 657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return iter->length; 659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not a valid origin */ 661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Should never get here! */ 662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -1; 663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV 667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) { 668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *s; 669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t pos; /* requested UTF-16 index */ 671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i; /* UTF-8 index */ 672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool havePos; 673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* calculate the requested UTF-16 index */ 675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(origin) { 676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_ZERO: 677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_START: 678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos=delta; 679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru havePos=TRUE; 680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* iter->index<0 (unknown) is possible */ 681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_CURRENT: 683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->index>=0) { 684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos=iter->index+delta; 685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru havePos=TRUE; 686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the current UTF-16 index is unknown after setState(), use only delta */ 688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos=0; 689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru havePos=FALSE; 690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_LIMIT: 693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UITER_LENGTH: 694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->length>=0) { 695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos=iter->length+delta; 696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru havePos=TRUE; 697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* pin to the end, avoid counting the length */ 699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->index=-1; 700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->start=iter->limit; 701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->reservedField=0; 702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(delta>=0) { 703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return UITER_UNKNOWN_INDEX; 704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the current UTF-16 index is unknown, use only delta */ 706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos=0; 707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru havePos=FALSE; 708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -1; /* Error */ 713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(havePos) { 716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* shortcuts: pinning to the edges of the string */ 717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pos<=0) { 718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->index=iter->start=iter->reservedField=0; 719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(iter->length>=0 && pos>=iter->length) { 721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->index=iter->length; 722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->start=iter->limit; 723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->reservedField=0; 724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return iter->index; 725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* minimize the number of U8_NEXT/PREV operations */ 728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->index<0 || pos<iter->index/2) { 729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* go forward from the start instead of backward from the current index */ 730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->index=iter->start=iter->reservedField=0; 731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(iter->length>=0 && (iter->length-pos)<(pos-iter->index)) { 732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if we have the UTF-16 index and length and the new position is 734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * closer to the end than the current index, 735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * then go backward from the end instead of forward from the current index 736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->index=iter->length; 738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->start=iter->limit; 739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->reservedField=0; 740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delta=pos-iter->index; 743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(delta==0) { 744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return iter->index; /* nothing to do */ 745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* move relative to unknown UTF-16 index */ 748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(delta==0) { 749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return UITER_UNKNOWN_INDEX; /* nothing to do */ 750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(-delta>=iter->start) { 751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* moving backwards by more UChars than there are UTF-8 bytes, pin to 0 */ 752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->index=iter->start=iter->reservedField=0; 753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(delta>=(iter->limit-iter->start)) { 755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* moving forward by more UChars than the remaining UTF-8 bytes, pin to the end */ 756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->index=iter->length; /* may or may not be <0 (unknown) */ 757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->start=iter->limit; 758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->reservedField=0; 759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return iter->index>=0 ? iter->index : (int32_t)UITER_UNKNOWN_INDEX; 760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* delta!=0 */ 764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* move towards the requested position, pin to the edges of the string */ 766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s=(const uint8_t *)iter->context; 767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos=iter->index; /* could be <0 (unknown) */ 768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=iter->start; 769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(delta>0) { 770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* go forward */ 771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limit=iter->limit; 772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->reservedField!=0) { 773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->reservedField=0; 774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++pos; 775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --delta; 776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(delta>0 && i<limit) { 7788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius U8_NEXT_OR_FFFD(s, i, limit, c); 7798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if(c<=0xffff) { 780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++pos; 781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --delta; 782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(delta>=2) { 783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos+=2; 784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delta-=2; 785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* delta==1 */ { 786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* stop in the middle of a supplementary code point */ 787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->reservedField=c; 788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++pos; 789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; /* delta=0; */ 790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(i==limit) { 793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->length<0 && iter->index>=0) { 794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->length= iter->reservedField==0 ? pos : pos+1; 795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(iter->index<0 && iter->length>=0) { 796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->index= iter->reservedField==0 ? iter->length : iter->length-1; 797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* delta<0 */ { 800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* go backward */ 801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->reservedField!=0) { 802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->reservedField=0; 803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i-=4; /* we stayed behind the supplementary code point; go before it now */ 804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --pos; 805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++delta; 806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(delta<0 && i>0) { 8088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius U8_PREV_OR_FFFD(s, 0, i, c); 8098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if(c<=0xffff) { 810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --pos; 811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++delta; 812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(delta<=-2) { 813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos-=2; 814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delta+=2; 815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* delta==-1 */ { 816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* stop in the middle of a supplementary code point */ 817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i+=4; /* back to behind this supplementary code point for consistent state */ 818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->reservedField=c; 819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --pos; 820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; /* delta=0; */ 821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->start=i; 826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->index>=0) { 827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return iter->index=pos; 828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* we started with index<0 (unknown) so pos is bogus */ 830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(i<=1) { 831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return iter->index=i; /* reached the beginning */ 832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* we still don't know the UTF-16 index */ 834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return UITER_UNKNOWN_INDEX; 835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool U_CALLCONV 840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8IteratorHasNext(UCharIterator *iter) { 841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return iter->start<iter->limit || iter->reservedField!=0; 842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool U_CALLCONV 845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8IteratorHasPrevious(UCharIterator *iter) { 846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return iter->start>0; 847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV 850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8IteratorCurrent(UCharIterator *iter) { 851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->reservedField!=0) { 852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U16_TRAIL(iter->reservedField); 853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(iter->start<iter->limit) { 854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *s=(const uint8_t *)iter->context; 855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i=iter->start; 857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 8588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius U8_NEXT_OR_FFFD(s, i, iter->limit, c); 8598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if(c<=0xffff) { 860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return c; 861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U16_LEAD(c); 863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_SENTINEL; 866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV 870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8IteratorNext(UCharIterator *iter) { 871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t index; 872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->reservedField!=0) { 874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar trail=U16_TRAIL(iter->reservedField); 875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->reservedField=0; 876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((index=iter->index)>=0) { 877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->index=index+1; 878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return trail; 880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(iter->start<iter->limit) { 881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *s=(const uint8_t *)iter->context; 882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 8848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius U8_NEXT_OR_FFFD(s, iter->start, iter->limit, c); 885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((index=iter->index)>=0) { 886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->index=++index; 887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->length<0 && iter->start==iter->limit) { 888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->length= c<=0xffff ? index : index+1; 889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(iter->start==iter->limit && iter->length>=0) { 891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->index= c<=0xffff ? iter->length : iter->length-1; 892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 8938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if(c<=0xffff) { 894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return c; 895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->reservedField=c; 897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U16_LEAD(c); 898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_SENTINEL; 901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 U_CALLCONV 905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8IteratorPrevious(UCharIterator *iter) { 906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t index; 907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->reservedField!=0) { 909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar lead=U16_LEAD(iter->reservedField); 910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->reservedField=0; 911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->start-=4; /* we stayed behind the supplementary code point; go before it now */ 912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((index=iter->index)>0) { 913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->index=index-1; 914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return lead; 916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(iter->start>0) { 917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *s=(const uint8_t *)iter->context; 918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 9208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius U8_PREV_OR_FFFD(s, 0, iter->start, c); 921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((index=iter->index)>0) { 922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->index=index-1; 923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(iter->start<=1) { 924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->index= c<=0xffff ? iter->start : iter->start+1; 925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 9268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if(c<=0xffff) { 927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return c; 928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->start+=4; /* back to behind this supplementary code point for consistent state */ 930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->reservedField=c; 931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U16_TRAIL(c); 932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_SENTINEL; 935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic uint32_t U_CALLCONV 939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8IteratorGetState(const UCharIterator *iter) { 940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t state=(uint32_t)(iter->start<<1); 941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter->reservedField!=0) { 942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state|=1; 943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return state; 945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV 948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8IteratorSetState(UCharIterator *iter, 949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t state, 950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) 951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* do nothing */ 954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(iter==NULL) { 955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(state==utf8IteratorGetState(iter)) { 957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* setting to the current state: no-op */ 958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t index=(int32_t)(state>>1); /* UTF-8 index */ 960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state&=1; /* 1 if in surrogate pair, must be index>=4 */ 961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((state==0 ? index<0 : index<4) || iter->limit<index) { 963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->start=index; /* restore UTF-8 byte index */ 966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(index<=1) { 967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->index=index; 968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->index=-1; /* unknown UTF-16 index */ 970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(state==0) { 972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->reservedField=0; 973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* verified index>=4 above */ 975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 9768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius U8_PREV_OR_FFFD((const uint8_t *)iter->context, 0, index, c); 977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<=0xffff) { 978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->reservedField=c; 981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UCharIterator utf8Iterator={ 988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 0, 0, 0, 0, 0, 989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8IteratorGetIndex, 990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8IteratorMove, 991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8IteratorHasNext, 992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8IteratorHasPrevious, 993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8IteratorCurrent, 994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8IteratorNext, 995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8IteratorPrevious, 996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8IteratorGetState, 998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8IteratorSetState 999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2 1002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuiter_setUTF8(UCharIterator *iter, const char *s, int32_t length) { 1003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter!=0) { 1004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(s!=0 && length>=-1) { 1005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *iter=utf8Iterator; 1006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->context=s; 1007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>=0) { 1008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->limit=length; 1009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->limit=(int32_t)uprv_strlen(s); 1011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->length= iter->limit<=1 ? iter->limit : -1; 1013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *iter=noopIterator; 1015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Helper functions --------------------------------------------------------- */ 1020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI UChar32 U_EXPORT2 1022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuiter_current32(UCharIterator *iter) { 1023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c, c2; 1024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=iter->current(iter); 1026103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U16_IS_SURROGATE(c)) { 1027103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U16_IS_SURROGATE_LEAD(c)) { 1028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * go to the next code unit 1030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * we know that we are not at the limit because c!=U_SENTINEL 1031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->move(iter, 1, UITER_CURRENT); 1033103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U16_IS_TRAIL(c2=iter->current(iter))) { 1034103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius c=U16_GET_SUPPLEMENTARY(c, c2); 1035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* undo index movement */ 1038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->move(iter, -1, UITER_CURRENT); 1039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1040103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U16_IS_LEAD(c2=iter->previous(iter))) { 1041103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius c=U16_GET_SUPPLEMENTARY(c2, c); 1042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c2>=0) { 1044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* undo index movement */ 1045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->move(iter, 1, UITER_CURRENT); 1046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return c; 1050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI UChar32 U_EXPORT2 1053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuiter_next32(UCharIterator *iter) { 1054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c, c2; 1055ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1056ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=iter->next(iter); 1057103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U16_IS_LEAD(c)) { 1058103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U16_IS_TRAIL(c2=iter->next(iter))) { 1059103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius c=U16_GET_SUPPLEMENTARY(c, c2); 1060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c2>=0) { 1061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched first surrogate, undo index movement */ 1062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->move(iter, -1, UITER_CURRENT); 1063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return c; 1066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI UChar32 U_EXPORT2 1069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuiter_previous32(UCharIterator *iter) { 1070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c, c2; 1071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=iter->previous(iter); 1073103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U16_IS_TRAIL(c)) { 1074103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U16_IS_LEAD(c2=iter->previous(iter))) { 1075103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius c=U16_GET_SUPPLEMENTARY(c2, c); 1076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(c2>=0) { 1077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched second surrogate, undo index movement */ 1078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->move(iter, 1, UITER_CURRENT); 1079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1081ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return c; 1082ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1083ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1084ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI uint32_t U_EXPORT2 1085ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuiter_getState(const UCharIterator *iter) { 1086ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(iter==NULL || iter->getState==NULL) { 1087ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return UITER_NO_STATE; 1088ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1089ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return iter->getState(iter); 1090ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1091ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1092ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1093ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2 1094ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) { 1095ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1096ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* do nothing */ 1097ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(iter==NULL) { 1098ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1099ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(iter->setState==NULL) { 1100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_UNSUPPORTED_ERROR; 1101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iter->setState(iter, state, pErrorCode); 1103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_END 1107