1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Copyright (C) 2000-2007, International Business Machines
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   file name:  ucnvlat1.cpp
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   encoding:   US-ASCII
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   tab size:   8 (not used)
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   indentation:4
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created on: 2000feb07
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created by: Markus W. Scherer
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv.h"
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uset.h"
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_bld.h"
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_cnv.h"
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* control optimizations according to the platform */
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define LATIN1_UNROLL_FROM_UNICODE 1
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* ISO 8859-1 --------------------------------------------------------------- */
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UErrorCode *pErrorCode) {
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint8_t *source;
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar *target;
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t targetCapacity, length;
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t *offsets;
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t sourceIndex;
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set up the local pointers */
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    source=(const uint8_t *)pArgs->source;
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    target=pArgs->target;
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    offsets=pArgs->offsets;
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceIndex=0;
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * for the minimum of the sourceLength and targetCapacity
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(length<=targetCapacity) {
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        targetCapacity=length;
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* target will be full */
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        length=targetCapacity;
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(targetCapacity>=8) {
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* This loop is unrolled for speed and improved pipelining. */
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t count, loops;
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        loops=count=targetCapacity>>3;
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        length=targetCapacity&=0x7;
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        do {
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            target[0]=source[0];
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            target[1]=source[1];
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            target[2]=source[2];
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            target[3]=source[3];
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            target[4]=source[4];
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            target[5]=source[5];
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            target[6]=source[6];
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            target[7]=source[7];
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            target+=8;
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            source+=8;
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } while(--count>0);
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(offsets!=NULL) {
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            do {
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsets[0]=sourceIndex++;
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsets[1]=sourceIndex++;
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsets[2]=sourceIndex++;
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsets[3]=sourceIndex++;
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsets[4]=sourceIndex++;
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsets[5]=sourceIndex++;
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsets[6]=sourceIndex++;
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsets[7]=sourceIndex++;
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsets+=8;
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } while(--loops>0);
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* conversion loop */
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while(targetCapacity>0) {
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *target++=*source++;
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        --targetCapacity;
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* write back the updated pointers */
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->source=(const char *)source;
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->target=target;
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set offsets */
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(offsets!=NULL) {
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        while(length>0) {
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *offsets++=sourceIndex++;
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            --length;
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pArgs->offsets=offsets;
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    UErrorCode *pErrorCode) {
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint8_t *source=(const uint8_t *)pArgs->source;
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(source<(const uint8_t *)pArgs->sourceLimit) {
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pArgs->source=(const char *)(source+1);
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return *source;
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* no output because of empty input */
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return 0xffff;
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                              UErrorCode *pErrorCode) {
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter *cnv;
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *source, *sourceLimit;
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *target, *oldTarget;
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t targetCapacity, length;
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t *offsets;
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 cp;
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar c, max;
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t sourceIndex;
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set up the local pointers */
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv=pArgs->converter;
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    source=pArgs->source;
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceLimit=pArgs->sourceLimit;
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    target=oldTarget=(uint8_t *)pArgs->target;
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    offsets=pArgs->offsets;
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(cnv->sharedData==&_Latin1Data) {
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        max=0xff; /* Latin-1 */
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        max=0x7f; /* US-ASCII */
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* get the converter state from UConverter */
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cp=cnv->fromUChar32;
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* sourceIndex=-1 if the current character began in the previous buffer */
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceIndex= cp==0 ? 0 : -1;
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * for the minimum of the sourceLength and targetCapacity
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    length=(int32_t)(sourceLimit-source);
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(length<targetCapacity) {
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        targetCapacity=length;
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* conversion loop */
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(cp!=0 && targetCapacity>0) {
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        goto getTrail;
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if LATIN1_UNROLL_FROM_UNICODE
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* unroll the loop with the most common case */
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(targetCapacity>=16) {
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t count, loops;
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UChar u, oredChars;
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        loops=count=targetCapacity>>4;
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        do {
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars=u=*source++;
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)u;
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=u=*source++;
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)u;
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=u=*source++;
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)u;
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=u=*source++;
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)u;
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=u=*source++;
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)u;
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=u=*source++;
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)u;
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=u=*source++;
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)u;
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=u=*source++;
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)u;
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=u=*source++;
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)u;
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=u=*source++;
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)u;
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=u=*source++;
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)u;
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=u=*source++;
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)u;
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=u=*source++;
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)u;
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=u=*source++;
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)u;
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=u=*source++;
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)u;
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=u=*source++;
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)u;
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* were all 16 entries really valid? */
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(oredChars>max) {
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* no, return to the first of these 16 */
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                source-=16;
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                target-=16;
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } while(--count>0);
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        count=loops-count;
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        targetCapacity-=16*count;
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(offsets!=NULL) {
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oldTarget+=16*count;
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            while(count>0) {
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex++;
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex++;
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex++;
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex++;
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex++;
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex++;
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex++;
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex++;
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex++;
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex++;
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex++;
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex++;
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex++;
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex++;
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex++;
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *offsets++=sourceIndex++;
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                --count;
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* conversion loop */
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    c=0;
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while(targetCapacity>0 && (c=*source++)<=max) {
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* convert the Unicode code point */
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *target++=(uint8_t)c;
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        --targetCapacity;
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(c>max) {
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cp=c;
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(!U_IS_SURROGATE(cp)) {
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* callback(unassigned) */
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(U_IS_SURROGATE_LEAD(cp)) {
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrail:
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(source<sourceLimit) {
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* test the following code unit */
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                UChar trail=*source;
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(U16_IS_TRAIL(trail)) {
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    ++source;
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cp=U16_GET_SUPPLEMENTARY(cp, trail);
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* this codepage does not map supplementary code points */
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* callback(unassigned) */
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* this is an unmatched lead code unit (1st surrogate) */
280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* callback(illegal) */
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* no more input */
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->fromUChar32=cp;
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                goto noMoreInput;
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* this is an unmatched trail code unit (2nd surrogate) */
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* callback(illegal) */
290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->fromUChar32=cp;
294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerunoMoreInput:
296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set offsets since the start */
298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(offsets!=NULL) {
299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        size_t count=target-oldTarget;
300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        while(count>0) {
301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *offsets++=sourceIndex++;
302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            --count;
303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* target is full */
308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* write back the updated pointers */
312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->source=source;
313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->target=(char *)target;
314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->offsets=offsets;
315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */
318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    UConverterToUnicodeArgs *pToUArgs,
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    UErrorCode *pErrorCode) {
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter *utf8;
323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint8_t *source, *sourceLimit;
324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *target;
325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t targetCapacity;
326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 c;
328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t b, t1;
329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set up the local pointers */
331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    utf8=pToUArgs->converter;
332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    source=(uint8_t *)pToUArgs->source;
333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    target=(uint8_t *)pFromUArgs->target;
335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* get the converter state from the UTF-8 UConverter */
338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    c=(UChar32)utf8->toUnicodeStatus;
339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(c!=0 && source<sourceLimit) {
340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(targetCapacity==0) {
341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return;
343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(c>=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) {
344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ++source;
345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=(uint8_t)(((c&3)<<6)|t1);
346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            --targetCapacity;
347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            utf8->toUnicodeStatus=0;
349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            utf8->toULength=0;
350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *pErrorCode=U_USING_DEFAULT_WARNING;
353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return;
354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Make sure that the last byte sequence before sourceLimit is complete
359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * or runs into a lead byte.
360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * In the conversion loop compare source with sourceLimit only once
361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * per multi-byte character.
362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * For Latin-1, adjust sourceLimit only for 1 trail byte because
363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * the conversion loop handles at most 2-byte sequences.
364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(source<sourceLimit && U8_IS_LEAD(*(sourceLimit-1))) {
366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        --sourceLimit;
367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* conversion loop */
370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while(source<sourceLimit) {
371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(targetCapacity>0) {
372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            b=*source++;
373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if((int8_t)b>=0) {
374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* convert ASCII */
375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)b;
376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                --targetCapacity;
377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if( /* handle U+0080..U+00FF inline */
378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                       b>=0xc2 && b<=0xc3 &&
379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                       (t1=(uint8_t)(*source-0x80)) <= 0x3f
380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ) {
381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ++source;
382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *target++=(uint8_t)(((b&3)<<6)|t1);
383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                --targetCapacity;
384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                pToUArgs->source=(char *)(source-1);
387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                pFromUArgs->target=(char *)target;
388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_USING_DEFAULT_WARNING;
389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return;
390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* target is full */
393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The sourceLimit may have been adjusted before the conversion loop
400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * to stop before a truncated sequence.
401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * If so, then collect the truncated sequence now.
402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * For Latin-1, there is at most exactly one lead byte because of the
403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * smaller sourceLimit adjustment logic.
404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        utf8->toULength=1;
408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        utf8->mode=utf8_countTrailBytes[b]+1;
409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* write back the updated pointers */
412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pToUArgs->source=(char *)source;
413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pFromUArgs->target=(char *)target;
414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_Latin1GetUnicodeSet(const UConverter *cnv,
418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     const USetAdder *sa,
419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     UConverterUnicodeSet which,
420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     UErrorCode *pErrorCode) {
421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sa->addRange(sa->set, 0, 0xff);
422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _Latin1Impl={
425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UCNV_LATIN_1,
426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _Latin1ToUnicodeWithOffsets,
435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _Latin1ToUnicodeWithOffsets,
436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _Latin1FromUnicodeWithOffsets,
437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _Latin1FromUnicodeWithOffsets,
438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _Latin1GetNextUChar,
439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _Latin1GetUnicodeSet,
445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_Latin1FromUTF8
448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _Latin1StaticData={
451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sizeof(UConverterStaticData),
452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "ISO-8859-1",
453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0,
456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0,
457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UConverterSharedData _Latin1Data={
461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sizeof(UConverterSharedData), ~((uint32_t) 0),
462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL, NULL, &_Latin1StaticData, FALSE, &_Latin1Impl,
463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0
464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* US-ASCII ----------------------------------------------------------------- */
467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           UErrorCode *pErrorCode) {
472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint8_t *source, *sourceLimit;
473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar *target, *oldTarget;
474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t targetCapacity, length;
475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t *offsets;
476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t sourceIndex;
478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t c;
480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set up the local pointers */
482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    source=(const uint8_t *)pArgs->source;
483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    target=oldTarget=pArgs->target;
485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    offsets=pArgs->offsets;
487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* sourceIndex=-1 if the current character began in the previous buffer */
489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceIndex=0;
490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * for the minimum of the sourceLength and targetCapacity
494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    length=(int32_t)(sourceLimit-source);
496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(length<targetCapacity) {
497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        targetCapacity=length;
498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(targetCapacity>=8) {
501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* This loop is unrolled for speed and improved pipelining. */
502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t count, loops;
503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UChar oredChars;
504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        loops=count=targetCapacity>>3;
506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        do {
507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars=target[0]=source[0];
508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=target[1]=source[1];
509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=target[2]=source[2];
510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=target[3]=source[3];
511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=target[4]=source[4];
512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=target[5]=source[5];
513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=target[6]=source[6];
514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=target[7]=source[7];
515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* were all 16 entries really valid? */
517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(oredChars>0x7f) {
518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* no, return to the first of these 16 */
519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            source+=8;
522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            target+=8;
523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } while(--count>0);
524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        count=loops-count;
525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        targetCapacity-=count*8;
526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(offsets!=NULL) {
528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oldTarget+=count*8;
529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            while(count>0) {
530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsets[0]=sourceIndex++;
531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsets[1]=sourceIndex++;
532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsets[2]=sourceIndex++;
533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsets[3]=sourceIndex++;
534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsets[4]=sourceIndex++;
535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsets[5]=sourceIndex++;
536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsets[6]=sourceIndex++;
537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsets[7]=sourceIndex++;
538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsets+=8;
539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                --count;
540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* conversion loop */
545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    c=0;
546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while(targetCapacity>0 && (c=*source++)<=0x7f) {
547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *target++=c;
548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        --targetCapacity;
549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(c>0x7f) {
552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* callback(illegal); copy the current bytes to toUBytes[] */
553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UConverter *cnv=pArgs->converter;
554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->toUBytes[0]=c;
555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->toULength=1;
556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(source<sourceLimit && target>=pArgs->targetLimit) {
558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* target is full */
559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set offsets since the start */
563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(offsets!=NULL) {
564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        size_t count=target-oldTarget;
565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        while(count>0) {
566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *offsets++=sourceIndex++;
567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            --count;
568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* write back the updated pointers */
572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->source=(const char *)source;
573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->target=target;
574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pArgs->offsets=offsets;
575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */
578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32
579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   UErrorCode *pErrorCode) {
581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint8_t *source;
582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t b;
583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    source=(const uint8_t *)pArgs->source;
585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(source<(const uint8_t *)pArgs->sourceLimit) {
586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        b=*source++;
587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pArgs->source=(const char *)source;
588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(b<=0x7f) {
589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return b;
590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UConverter *cnv=pArgs->converter;
592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->toUBytes[0]=b;
593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->toULength=1;
594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return 0xffff;
596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* no output because of empty input */
600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return 0xffff;
602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* "Convert" UTF-8 to US-ASCII: Validate and copy. */
605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   UConverterToUnicodeArgs *pToUArgs,
608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   UErrorCode *pErrorCode) {
609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint8_t *source, *sourceLimit;
610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *target;
611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t targetCapacity, length;
612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t c;
614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(pToUArgs->converter->toUnicodeStatus!=0) {
616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* no handling of partial UTF-8 characters here, fall back to pivoting */
617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_USING_DEFAULT_WARNING;
618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set up the local pointers */
622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    source=(const uint8_t *)pToUArgs->source;
623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceLimit=(const uint8_t *)pToUArgs->sourceLimit;
624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    target=(uint8_t *)pFromUArgs->target;
625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter
629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * for the minimum of the sourceLength and targetCapacity
630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    length=(int32_t)(sourceLimit-source);
632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(length<targetCapacity) {
633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        targetCapacity=length;
634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* unroll the loop with the most common case */
637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(targetCapacity>=16) {
638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t count, loops;
639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uint8_t oredChars;
640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        loops=count=targetCapacity>>4;
642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        do {
643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars=*target++=*source++;
644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=*target++=*source++;
645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=*target++=*source++;
646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=*target++=*source++;
647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=*target++=*source++;
648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=*target++=*source++;
649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=*target++=*source++;
650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=*target++=*source++;
651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=*target++=*source++;
652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=*target++=*source++;
653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=*target++=*source++;
654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=*target++=*source++;
655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=*target++=*source++;
656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=*target++=*source++;
657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=*target++=*source++;
658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oredChars|=*target++=*source++;
659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* were all 16 entries really valid? */
661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(oredChars>0x7f) {
662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* no, return to the first of these 16 */
663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                source-=16;
664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                target-=16;
665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } while(--count>0);
668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        count=loops-count;
669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        targetCapacity-=16*count;
670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* conversion loop */
673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    c=0;
674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while(targetCapacity>0 && (c=*source)<=0x7f) {
675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ++source;
676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *target++=c;
677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        --targetCapacity;
678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(c>0x7f) {
681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* non-ASCII character, handle in standard converter */
682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_USING_DEFAULT_WARNING;
683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(source<sourceLimit && target>=(const uint8_t *)pFromUArgs->targetLimit) {
684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* target is full */
685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* write back the updated pointers */
689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pToUArgs->source=(const char *)source;
690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pFromUArgs->target=(char *)target;
691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ASCIIGetUnicodeSet(const UConverter *cnv,
695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    const USetAdder *sa,
696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    UConverterUnicodeSet which,
697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    UErrorCode *pErrorCode) {
698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sa->addRange(sa->set, 0, 0x7f);
699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _ASCIIImpl={
702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UCNV_US_ASCII,
703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _ASCIIToUnicodeWithOffsets,
712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _ASCIIToUnicodeWithOffsets,
713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _Latin1FromUnicodeWithOffsets,
714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _Latin1FromUnicodeWithOffsets,
715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _ASCIIGetNextUChar,
716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _ASCIIGetUnicodeSet,
722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_ASCIIFromUTF8
725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _ASCIIStaticData={
728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sizeof(UConverterStaticData),
729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "US-ASCII",
730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0,
733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0,
734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UConverterSharedData _ASCIIData={
738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sizeof(UConverterSharedData), ~((uint32_t) 0),
739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL, NULL, &_ASCIIStaticData, FALSE, &_ASCIIImpl,
740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0
741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
744