1/*
2**********************************************************************
3*   Copyright (C) 2002-2011, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*   file name:  ucnv_u7.c
7*   encoding:   US-ASCII
8*   tab size:   8 (not used)
9*   indentation:4
10*
11*   created on: 2002jul01
12*   created by: Markus W. Scherer
13*
14*   UTF-7 converter implementation. Used to be in ucnv_utf.c.
15*/
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
20
21#include "unicode/ucnv.h"
22#include "ucnv_bld.h"
23#include "ucnv_cnv.h"
24#include "uassert.h"
25
26/* UTF-7 -------------------------------------------------------------------- */
27
28/*
29 * UTF-7 is a stateful encoding of Unicode.
30 * It is defined in RFC 2152. (http://www.ietf.org/rfc/rfc2152.txt)
31 * It was intended for use in Internet email systems, using in its bytewise
32 * encoding only a subset of 7-bit US-ASCII.
33 * UTF-7 is deprecated in favor of UTF-8/16/32 and SCSU, but still
34 * occasionally used.
35 *
36 * For converting Unicode to UTF-7, the RFC allows to encode some US-ASCII
37 * characters directly or in base64. Especially, the characters in set O
38 * as defined in the RFC (see below) may be encoded directly but are not
39 * allowed in, e.g., email headers.
40 * By default, the ICU UTF-7 converter encodes set O directly.
41 * By choosing the option "version=1", set O will be escaped instead.
42 * For example:
43 *     utf7Converter=ucnv_open("UTF-7,version=1");
44 *
45 * For details about email headers see RFC 2047.
46 */
47
48/*
49 * Tests for US-ASCII characters belonging to character classes
50 * defined in UTF-7.
51 *
52 * Set D (directly encoded characters) consists of the following
53 * characters: the upper and lower case letters A through Z
54 * and a through z, the 10 digits 0-9, and the following nine special
55 * characters (note that "+" and "=" are omitted):
56 *     '(),-./:?
57 *
58 * Set O (optional direct characters) consists of the following
59 * characters (note that "\" and "~" are omitted):
60 *     !"#$%&*;<=>@[]^_`{|}
61 *
62 * According to the rules in RFC 2152, the byte values for the following
63 * US-ASCII characters are not used in UTF-7 and are therefore illegal:
64 * - all C0 control codes except for CR LF TAB
65 * - BACKSLASH
66 * - TILDE
67 * - DEL
68 * - all codes beyond US-ASCII, i.e. all >127
69 */
70#define inSetD(c) \
71    ((uint8_t)((c)-97)<26 || (uint8_t)((c)-65)<26 || /* letters */ \
72     (uint8_t)((c)-48)<10 ||    /* digits */ \
73     (uint8_t)((c)-39)<3 ||     /* '() */ \
74     (uint8_t)((c)-44)<4 ||     /* ,-./ */ \
75     (c)==58 || (c)==63         /* :? */ \
76    )
77
78#define inSetO(c) \
79    ((uint8_t)((c)-33)<6 ||         /* !"#$%& */ \
80     (uint8_t)((c)-59)<4 ||         /* ;<=> */ \
81     (uint8_t)((c)-93)<4 ||         /* ]^_` */ \
82     (uint8_t)((c)-123)<3 ||        /* {|} */ \
83     (c)==42 || (c)==64 || (c)==91  /* *@[ */ \
84    )
85
86#define isCRLFTAB(c) ((c)==13 || (c)==10 || (c)==9)
87#define isCRLFSPTAB(c) ((c)==32 || (c)==13 || (c)==10 || (c)==9)
88
89#define PLUS  43
90#define MINUS 45
91#define BACKSLASH 92
92#define TILDE 126
93
94/* legal byte values: all US-ASCII graphic characters from space to before tilde, and CR LF TAB */
95#define isLegalUTF7(c) (((uint8_t)((c)-32)<94 && (c)!=BACKSLASH) || isCRLFTAB(c))
96
97/* encode directly sets D and O and CR LF SP TAB */
98static const UBool encodeDirectlyMaximum[128]={
99 /* 0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
100    0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
101    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
102
103    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
104    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
105
106    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
108
109    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0
111};
112
113/* encode directly set D and CR LF SP TAB but not set O */
114static const UBool encodeDirectlyRestricted[128]={
115 /* 0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
116    0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
117    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
118
119    1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1,
120    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
121
122    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
123    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
124
125    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
126    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0
127};
128
129static const uint8_t
130toBase64[64]={
131    /* A-Z */
132    65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
133    78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
134    /* a-z */
135    97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
136    110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
137    /* 0-9 */
138    48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
139    /* +/ */
140    43, 47
141};
142
143static const int8_t
144fromBase64[128]={
145    /* C0 controls, -1 for legal ones (CR LF TAB), -3 for illegal ones */
146    -3, -3, -3, -3, -3, -3, -3, -3, -3, -1, -1, -3, -3, -1, -3, -3,
147    -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
148
149    /* general punctuation with + and / and a special value (-2) for - */
150    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -2, -1, 63,
151    /* digits */
152    52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
153
154    /* A-Z */
155    -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
156    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1,
157
158    /* a-z */
159    -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
160    41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -3, -3
161};
162
163/*
164 * converter status values:
165 *
166 * toUnicodeStatus:
167 *     24 inDirectMode (boolean)
168 * 23..16 base64Counter (-1..7)
169 * 15..0  bits (up to 14 bits incoming base64)
170 *
171 * fromUnicodeStatus:
172 * 31..28 version (0: set O direct  1: set O escaped)
173 *     24 inDirectMode (boolean)
174 * 23..16 base64Counter (0..2)
175 *  7..0  bits (6 bits outgoing base64)
176 *
177 */
178
179static void
180_UTF7Reset(UConverter *cnv, UConverterResetChoice choice) {
181    if(choice<=UCNV_RESET_TO_UNICODE) {
182        /* reset toUnicode */
183        cnv->toUnicodeStatus=0x1000000; /* inDirectMode=TRUE */
184        cnv->toULength=0;
185    }
186    if(choice!=UCNV_RESET_TO_UNICODE) {
187        /* reset fromUnicode */
188        cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
189    }
190}
191
192static void
193_UTF7Open(UConverter *cnv,
194          UConverterLoadArgs *pArgs,
195          UErrorCode *pErrorCode) {
196    if(UCNV_GET_VERSION(cnv)<=1) {
197        /* TODO(markus): Should just use cnv->options rather than copying the version number. */
198        cnv->fromUnicodeStatus=UCNV_GET_VERSION(cnv)<<28;
199        _UTF7Reset(cnv, UCNV_RESET_BOTH);
200    } else {
201        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
202    }
203}
204
205static void
206_UTF7ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
207                          UErrorCode *pErrorCode) {
208    UConverter *cnv;
209    const uint8_t *source, *sourceLimit;
210    UChar *target;
211    const UChar *targetLimit;
212    int32_t *offsets;
213
214    uint8_t *bytes;
215    uint8_t byteIndex;
216
217    int32_t length, targetCapacity;
218
219    /* UTF-7 state */
220    uint16_t bits;
221    int8_t base64Counter;
222    UBool inDirectMode;
223
224    int8_t base64Value;
225
226    int32_t sourceIndex, nextSourceIndex;
227
228    uint8_t b;
229    /* set up the local pointers */
230    cnv=pArgs->converter;
231
232    source=(const uint8_t *)pArgs->source;
233    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
234    target=pArgs->target;
235    targetLimit=pArgs->targetLimit;
236    offsets=pArgs->offsets;
237    /* get the state machine state */
238    {
239        uint32_t status=cnv->toUnicodeStatus;
240        inDirectMode=(UBool)((status>>24)&1);
241        base64Counter=(int8_t)(status>>16);
242        bits=(uint16_t)status;
243    }
244    bytes=cnv->toUBytes;
245    byteIndex=cnv->toULength;
246
247    /* sourceIndex=-1 if the current character began in the previous buffer */
248    sourceIndex=byteIndex==0 ? 0 : -1;
249    nextSourceIndex=0;
250
251    if(inDirectMode) {
252directMode:
253        /*
254         * In Direct Mode, most US-ASCII characters are encoded directly, i.e.,
255         * with their US-ASCII byte values.
256         * Backslash and Tilde and most control characters are not allowed in UTF-7.
257         * A plus sign starts Unicode (or "escape") Mode.
258         *
259         * In Direct Mode, only the sourceIndex is used.
260         */
261        byteIndex=0;
262        length=(int32_t)(sourceLimit-source);
263        targetCapacity=(int32_t)(targetLimit-target);
264        if(length>targetCapacity) {
265            length=targetCapacity;
266        }
267        while(length>0) {
268            b=*source++;
269            if(!isLegalUTF7(b)) {
270                /* illegal */
271                bytes[0]=b;
272                byteIndex=1;
273                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
274                break;
275            } else if(b!=PLUS) {
276                /* write directly encoded character */
277                *target++=b;
278                if(offsets!=NULL) {
279                    *offsets++=sourceIndex++;
280                }
281            } else /* PLUS */ {
282                /* switch to Unicode mode */
283                nextSourceIndex=++sourceIndex;
284                inDirectMode=FALSE;
285                byteIndex=0;
286                bits=0;
287                base64Counter=-1;
288                goto unicodeMode;
289            }
290            --length;
291        }
292        if(source<sourceLimit && target>=targetLimit) {
293            /* target is full */
294            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
295        }
296    } else {
297unicodeMode:
298        /*
299         * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded.
300         * The base64 sequence ends with any character that is not in the base64 alphabet.
301         * A terminating minus sign is consumed.
302         *
303         * In Unicode Mode, the sourceIndex has the index to the start of the current
304         * base64 bytes, while nextSourceIndex is precisely parallel to source,
305         * keeping the index to the following byte.
306         * Note that in 2 out of 3 cases, UChars overlap within a base64 byte.
307         */
308        while(source<sourceLimit) {
309            if(target<targetLimit) {
310                bytes[byteIndex++]=b=*source++;
311                ++nextSourceIndex;
312                base64Value = -3; /* initialize as illegal */
313                if(b>=126 || (base64Value=fromBase64[b])==-3 || base64Value==-1) {
314                    /* either
315                     * base64Value==-1 for any legal character except base64 and minus sign, or
316                     * base64Value==-3 for illegal characters:
317                     * 1. In either case, leave Unicode mode.
318                     * 2.1. If we ended with an incomplete UChar or none after the +, then
319                     *      generate an error for the preceding erroneous sequence and deal with
320                     *      the current (possibly illegal) character next time through.
321                     * 2.2. Else the current char comes after a complete UChar, which was already
322                     *      pushed to the output buf, so:
323                     * 2.2.1. If the current char is legal, just save it for processing next time.
324                     *        It may be for example, a plus which we need to deal with in direct mode.
325                     * 2.2.2. Else if the current char is illegal, we might as well deal with it here.
326                     */
327                    inDirectMode=TRUE;
328                    if(base64Counter==-1) {
329                        /* illegal: + immediately followed by something other than base64 or minus sign */
330                        /* include the plus sign in the reported sequence, but not the subsequent char */
331                        --source;
332                        bytes[0]=PLUS;
333                        byteIndex=1;
334                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
335                        break;
336                    } else if(bits!=0) {
337                        /* bits are illegally left over, a UChar is incomplete */
338                        /* don't include current char (legal or illegal) in error seq */
339                        --source;
340                        --byteIndex;
341                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
342                        break;
343                    } else {
344                        /* previous UChar was complete */
345                        if(base64Value==-3) {
346                            /* current character is illegal, deal with it here */
347                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
348                            break;
349                        } else {
350                            /* un-read the current character in case it is a plus sign */
351                            --source;
352                            sourceIndex=nextSourceIndex-1;
353                            goto directMode;
354                        }
355                    }
356                } else if(base64Value>=0) {
357                    /* collect base64 bytes into UChars */
358                    switch(base64Counter) {
359                    case -1: /* -1 is immediately after the + */
360                    case 0:
361                        bits=base64Value;
362                        base64Counter=1;
363                        break;
364                    case 1:
365                    case 3:
366                    case 4:
367                    case 6:
368                        bits=(uint16_t)((bits<<6)|base64Value);
369                        ++base64Counter;
370                        break;
371                    case 2:
372                        *target++=(UChar)((bits<<4)|(base64Value>>2));
373                        if(offsets!=NULL) {
374                            *offsets++=sourceIndex;
375                            sourceIndex=nextSourceIndex-1;
376                        }
377                        bytes[0]=b; /* keep this byte in case an error occurs */
378                        byteIndex=1;
379                        bits=(uint16_t)(base64Value&3);
380                        base64Counter=3;
381                        break;
382                    case 5:
383                        *target++=(UChar)((bits<<2)|(base64Value>>4));
384                        if(offsets!=NULL) {
385                            *offsets++=sourceIndex;
386                            sourceIndex=nextSourceIndex-1;
387                        }
388                        bytes[0]=b; /* keep this byte in case an error occurs */
389                        byteIndex=1;
390                        bits=(uint16_t)(base64Value&15);
391                        base64Counter=6;
392                        break;
393                    case 7:
394                        *target++=(UChar)((bits<<6)|base64Value);
395                        if(offsets!=NULL) {
396                            *offsets++=sourceIndex;
397                            sourceIndex=nextSourceIndex;
398                        }
399                        byteIndex=0;
400                        bits=0;
401                        base64Counter=0;
402                        break;
403                    default:
404                        /* will never occur */
405                        break;
406                    }
407                } else /*base64Value==-2*/ {
408                    /* minus sign terminates the base64 sequence */
409                    inDirectMode=TRUE;
410                    if(base64Counter==-1) {
411                        /* +- i.e. a minus immediately following a plus */
412                        *target++=PLUS;
413                        if(offsets!=NULL) {
414                            *offsets++=sourceIndex-1;
415                        }
416                    } else {
417                        /* absorb the minus and leave the Unicode Mode */
418                        if(bits!=0) {
419                            /* bits are illegally left over, a UChar is incomplete */
420                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
421                            break;
422                        }
423                    }
424                    sourceIndex=nextSourceIndex;
425                    goto directMode;
426                }
427            } else {
428                /* target is full */
429                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
430                break;
431            }
432        }
433    }
434
435    if(U_SUCCESS(*pErrorCode) && pArgs->flush && source==sourceLimit && bits==0) {
436        /*
437         * if we are in Unicode mode, then the byteIndex might not be 0,
438         * but that is ok if bits==0
439         * -> we set byteIndex=0 at the end of the stream to avoid a truncated error
440         * (not true for IMAP-mailbox-name where we must end in direct mode)
441         */
442        byteIndex=0;
443    }
444
445    /* set the converter state back into UConverter */
446    cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
447    cnv->toULength=byteIndex;
448
449    /* write back the updated pointers */
450    pArgs->source=(const char *)source;
451    pArgs->target=target;
452    pArgs->offsets=offsets;
453    return;
454}
455
456static void
457_UTF7FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
458                            UErrorCode *pErrorCode) {
459    UConverter *cnv;
460    const UChar *source, *sourceLimit;
461    uint8_t *target, *targetLimit;
462    int32_t *offsets;
463
464    int32_t length, targetCapacity, sourceIndex;
465    UChar c;
466
467    /* UTF-7 state */
468    const UBool *encodeDirectly;
469    uint8_t bits;
470    int8_t base64Counter;
471    UBool inDirectMode;
472
473    /* set up the local pointers */
474    cnv=pArgs->converter;
475
476    /* set up the local pointers */
477    source=pArgs->source;
478    sourceLimit=pArgs->sourceLimit;
479    target=(uint8_t *)pArgs->target;
480    targetLimit=(uint8_t *)pArgs->targetLimit;
481    offsets=pArgs->offsets;
482
483    /* get the state machine state */
484    {
485        uint32_t status=cnv->fromUnicodeStatus;
486        encodeDirectly= status<0x10000000 ? encodeDirectlyMaximum : encodeDirectlyRestricted;
487        inDirectMode=(UBool)((status>>24)&1);
488        base64Counter=(int8_t)(status>>16);
489        bits=(uint8_t)status;
490        U_ASSERT(bits<=sizeof(toBase64)/sizeof(toBase64[0]));
491    }
492
493    /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
494    sourceIndex=0;
495
496    if(inDirectMode) {
497directMode:
498        length=(int32_t)(sourceLimit-source);
499        targetCapacity=(int32_t)(targetLimit-target);
500        if(length>targetCapacity) {
501            length=targetCapacity;
502        }
503        while(length>0) {
504            c=*source++;
505            /* currently always encode CR LF SP TAB directly */
506            if(c<=127 && encodeDirectly[c]) {
507                /* encode directly */
508                *target++=(uint8_t)c;
509                if(offsets!=NULL) {
510                    *offsets++=sourceIndex++;
511                }
512            } else if(c==PLUS) {
513                /* output +- for + */
514                *target++=PLUS;
515                if(target<targetLimit) {
516                    *target++=MINUS;
517                    if(offsets!=NULL) {
518                        *offsets++=sourceIndex;
519                        *offsets++=sourceIndex++;
520                    }
521                    /* realign length and targetCapacity */
522                    goto directMode;
523                } else {
524                    if(offsets!=NULL) {
525                        *offsets++=sourceIndex++;
526                    }
527                    cnv->charErrorBuffer[0]=MINUS;
528                    cnv->charErrorBufferLength=1;
529                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
530                    break;
531                }
532            } else {
533                /* un-read this character and switch to Unicode Mode */
534                --source;
535                *target++=PLUS;
536                if(offsets!=NULL) {
537                    *offsets++=sourceIndex;
538                }
539                inDirectMode=FALSE;
540                base64Counter=0;
541                goto unicodeMode;
542            }
543            --length;
544        }
545        if(source<sourceLimit && target>=targetLimit) {
546            /* target is full */
547            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
548        }
549    } else {
550unicodeMode:
551        while(source<sourceLimit) {
552            if(target<targetLimit) {
553                c=*source++;
554                if(c<=127 && encodeDirectly[c]) {
555                    /* encode directly */
556                    inDirectMode=TRUE;
557
558                    /* trick: back out this character to make this easier */
559                    --source;
560
561                    /* terminate the base64 sequence */
562                    if(base64Counter!=0) {
563                        /* write remaining bits for the previous character */
564                        *target++=toBase64[bits];
565                        if(offsets!=NULL) {
566                            *offsets++=sourceIndex-1;
567                        }
568                    }
569                    if(fromBase64[c]!=-1) {
570                        /* need to terminate with a minus */
571                        if(target<targetLimit) {
572                            *target++=MINUS;
573                            if(offsets!=NULL) {
574                                *offsets++=sourceIndex-1;
575                            }
576                        } else {
577                            cnv->charErrorBuffer[0]=MINUS;
578                            cnv->charErrorBufferLength=1;
579                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
580                            break;
581                        }
582                    }
583                    goto directMode;
584                } else {
585                    /*
586                     * base64 this character:
587                     * Output 2 or 3 base64 bytes for the remaining bits of the previous character
588                     * and the bits of this character, each implicitly in UTF-16BE.
589                     *
590                     * Here, bits is an 8-bit variable because only 6 bits need to be kept from one
591                     * character to the next. The actual 2 or 4 bits are shifted to the left edge
592                     * of the 6-bits field 5..0 to make the termination of the base64 sequence easier.
593                     */
594                    switch(base64Counter) {
595                    case 0:
596                        *target++=toBase64[c>>10];
597                        if(target<targetLimit) {
598                            *target++=toBase64[(c>>4)&0x3f];
599                            if(offsets!=NULL) {
600                                *offsets++=sourceIndex;
601                                *offsets++=sourceIndex++;
602                            }
603                        } else {
604                            if(offsets!=NULL) {
605                                *offsets++=sourceIndex++;
606                            }
607                            cnv->charErrorBuffer[0]=toBase64[(c>>4)&0x3f];
608                            cnv->charErrorBufferLength=1;
609                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
610                        }
611                        bits=(uint8_t)((c&15)<<2);
612                        base64Counter=1;
613                        break;
614                    case 1:
615                        *target++=toBase64[bits|(c>>14)];
616                        if(target<targetLimit) {
617                            *target++=toBase64[(c>>8)&0x3f];
618                            if(target<targetLimit) {
619                                *target++=toBase64[(c>>2)&0x3f];
620                                if(offsets!=NULL) {
621                                    *offsets++=sourceIndex;
622                                    *offsets++=sourceIndex;
623                                    *offsets++=sourceIndex++;
624                                }
625                            } else {
626                                if(offsets!=NULL) {
627                                    *offsets++=sourceIndex;
628                                    *offsets++=sourceIndex++;
629                                }
630                                cnv->charErrorBuffer[0]=toBase64[(c>>2)&0x3f];
631                                cnv->charErrorBufferLength=1;
632                                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
633                            }
634                        } else {
635                            if(offsets!=NULL) {
636                                *offsets++=sourceIndex++;
637                            }
638                            cnv->charErrorBuffer[0]=toBase64[(c>>8)&0x3f];
639                            cnv->charErrorBuffer[1]=toBase64[(c>>2)&0x3f];
640                            cnv->charErrorBufferLength=2;
641                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
642                        }
643                        bits=(uint8_t)((c&3)<<4);
644                        base64Counter=2;
645                        break;
646                    case 2:
647                        *target++=toBase64[bits|(c>>12)];
648                        if(target<targetLimit) {
649                            *target++=toBase64[(c>>6)&0x3f];
650                            if(target<targetLimit) {
651                                *target++=toBase64[c&0x3f];
652                                if(offsets!=NULL) {
653                                    *offsets++=sourceIndex;
654                                    *offsets++=sourceIndex;
655                                    *offsets++=sourceIndex++;
656                                }
657                            } else {
658                                if(offsets!=NULL) {
659                                    *offsets++=sourceIndex;
660                                    *offsets++=sourceIndex++;
661                                }
662                                cnv->charErrorBuffer[0]=toBase64[c&0x3f];
663                                cnv->charErrorBufferLength=1;
664                                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
665                            }
666                        } else {
667                            if(offsets!=NULL) {
668                                *offsets++=sourceIndex++;
669                            }
670                            cnv->charErrorBuffer[0]=toBase64[(c>>6)&0x3f];
671                            cnv->charErrorBuffer[1]=toBase64[c&0x3f];
672                            cnv->charErrorBufferLength=2;
673                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
674                        }
675                        bits=0;
676                        base64Counter=0;
677                        break;
678                    default:
679                        /* will never occur */
680                        break;
681                    }
682                }
683            } else {
684                /* target is full */
685                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
686                break;
687            }
688        }
689    }
690
691    if(pArgs->flush && source>=sourceLimit) {
692        /* flush remaining bits to the target */
693        if(!inDirectMode) {
694            if (base64Counter!=0) {
695                if(target<targetLimit) {
696                    *target++=toBase64[bits];
697                    if(offsets!=NULL) {
698                        *offsets++=sourceIndex-1;
699                    }
700                } else {
701                    cnv->charErrorBuffer[cnv->charErrorBufferLength++]=toBase64[bits];
702                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
703                }
704            }
705            /* Add final MINUS to terminate unicodeMode */
706            if(target<targetLimit) {
707                *target++=MINUS;
708                if(offsets!=NULL) {
709                    *offsets++=sourceIndex-1;
710                }
711            } else {
712                cnv->charErrorBuffer[cnv->charErrorBufferLength++]=MINUS;
713                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
714            }
715        }
716        /* reset the state for the next conversion */
717        cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
718    } else {
719        /* set the converter state back into UConverter */
720        cnv->fromUnicodeStatus=
721            (cnv->fromUnicodeStatus&0xf0000000)|    /* keep version*/
722            ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits;
723    }
724
725    /* write back the updated pointers */
726    pArgs->source=source;
727    pArgs->target=(char *)target;
728    pArgs->offsets=offsets;
729    return;
730}
731
732static const char *
733_UTF7GetName(const UConverter *cnv) {
734    switch(cnv->fromUnicodeStatus>>28) {
735    case 1:
736        return "UTF-7,version=1";
737    default:
738        return "UTF-7";
739    }
740}
741
742static const UConverterImpl _UTF7Impl={
743    UCNV_UTF7,
744
745    NULL,
746    NULL,
747
748    _UTF7Open,
749    NULL,
750    _UTF7Reset,
751
752    _UTF7ToUnicodeWithOffsets,
753    _UTF7ToUnicodeWithOffsets,
754    _UTF7FromUnicodeWithOffsets,
755    _UTF7FromUnicodeWithOffsets,
756    NULL,
757
758    NULL,
759    _UTF7GetName,
760    NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */
761    NULL,
762    ucnv_getCompleteUnicodeSet
763};
764
765static const UConverterStaticData _UTF7StaticData={
766    sizeof(UConverterStaticData),
767    "UTF-7",
768    0, /* TODO CCSID for UTF-7 */
769    UCNV_IBM, UCNV_UTF7,
770    1, 4,
771    { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */
772    FALSE, FALSE,
773    0,
774    0,
775    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
776};
777
778const UConverterSharedData _UTF7Data={
779    sizeof(UConverterSharedData), ~((uint32_t)0),
780    NULL, NULL, &_UTF7StaticData, FALSE, &_UTF7Impl,
781    0
782};
783
784/* IMAP mailbox name encoding ----------------------------------------------- */
785
786/*
787 * RFC 2060: INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1
788 * http://www.ietf.org/rfc/rfc2060.txt
789 *
790 * 5.1.3.  Mailbox International Naming Convention
791 *
792 * By convention, international mailbox names are specified using a
793 * modified version of the UTF-7 encoding described in [UTF-7].  The
794 * purpose of these modifications is to correct the following problems
795 * with UTF-7:
796 *
797 *    1) UTF-7 uses the "+" character for shifting; this conflicts with
798 *       the common use of "+" in mailbox names, in particular USENET
799 *       newsgroup names.
800 *
801 *    2) UTF-7's encoding is BASE64 which uses the "/" character; this
802 *       conflicts with the use of "/" as a popular hierarchy delimiter.
803 *
804 *    3) UTF-7 prohibits the unencoded usage of "\"; this conflicts with
805 *       the use of "\" as a popular hierarchy delimiter.
806 *
807 *    4) UTF-7 prohibits the unencoded usage of "~"; this conflicts with
808 *       the use of "~" in some servers as a home directory indicator.
809 *
810 *    5) UTF-7 permits multiple alternate forms to represent the same
811 *       string; in particular, printable US-ASCII chararacters can be
812 *       represented in encoded form.
813 *
814 * In modified UTF-7, printable US-ASCII characters except for "&"
815 * represent themselves; that is, characters with octet values 0x20-0x25
816 * and 0x27-0x7e.  The character "&" (0x26) is represented by the two-
817 * octet sequence "&-".
818 *
819 * All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all
820 * Unicode 16-bit octets) are represented in modified BASE64, with a
821 * further modification from [UTF-7] that "," is used instead of "/".
822 * Modified BASE64 MUST NOT be used to represent any printing US-ASCII
823 * character which can represent itself.
824 *
825 * "&" is used to shift to modified BASE64 and "-" to shift back to US-
826 * ASCII.  All names start in US-ASCII, and MUST end in US-ASCII (that
827 * is, a name that ends with a Unicode 16-bit octet MUST end with a "-
828 * ").
829 *
830 * For example, here is a mailbox name which mixes English, Japanese,
831 * and Chinese text: ~peter/mail/&ZeVnLIqe-/&U,BTFw-
832 */
833
834/*
835 * Tests for US-ASCII characters belonging to character classes
836 * defined in UTF-7.
837 *
838 * Set D (directly encoded characters) consists of the following
839 * characters: the upper and lower case letters A through Z
840 * and a through z, the 10 digits 0-9, and the following nine special
841 * characters (note that "+" and "=" are omitted):
842 *     '(),-./:?
843 *
844 * Set O (optional direct characters) consists of the following
845 * characters (note that "\" and "~" are omitted):
846 *     !"#$%&*;<=>@[]^_`{|}
847 *
848 * According to the rules in RFC 2152, the byte values for the following
849 * US-ASCII characters are not used in UTF-7 and are therefore illegal:
850 * - all C0 control codes except for CR LF TAB
851 * - BACKSLASH
852 * - TILDE
853 * - DEL
854 * - all codes beyond US-ASCII, i.e. all >127
855 */
856
857/* uses '&' not '+' to start a base64 sequence */
858#define AMPERSAND 0x26
859#define COMMA 0x2c
860#define SLASH 0x2f
861
862/* legal byte values: all US-ASCII graphic characters 0x20..0x7e */
863#define isLegalIMAP(c) (0x20<=(c) && (c)<=0x7e)
864
865/* direct-encode all of printable ASCII 0x20..0x7e except '&' 0x26 */
866#define inSetDIMAP(c) (isLegalIMAP(c) && c!=AMPERSAND)
867
868#define TO_BASE64_IMAP(n) ((n)<63 ? toBase64[n] : COMMA)
869#define FROM_BASE64_IMAP(c) ((c)==COMMA ? 63 : (c)==SLASH ? -1 : fromBase64[c])
870
871/*
872 * converter status values:
873 *
874 * toUnicodeStatus:
875 *     24 inDirectMode (boolean)
876 * 23..16 base64Counter (-1..7)
877 * 15..0  bits (up to 14 bits incoming base64)
878 *
879 * fromUnicodeStatus:
880 *     24 inDirectMode (boolean)
881 * 23..16 base64Counter (0..2)
882 *  7..0  bits (6 bits outgoing base64)
883 *
884 * ignore bits 31..25
885 */
886
887static void
888_IMAPToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
889                          UErrorCode *pErrorCode) {
890    UConverter *cnv;
891    const uint8_t *source, *sourceLimit;
892    UChar *target;
893    const UChar *targetLimit;
894    int32_t *offsets;
895
896    uint8_t *bytes;
897    uint8_t byteIndex;
898
899    int32_t length, targetCapacity;
900
901    /* UTF-7 state */
902    uint16_t bits;
903    int8_t base64Counter;
904    UBool inDirectMode;
905
906    int8_t base64Value;
907
908    int32_t sourceIndex, nextSourceIndex;
909
910    UChar c;
911    uint8_t b;
912
913    /* set up the local pointers */
914    cnv=pArgs->converter;
915
916    source=(const uint8_t *)pArgs->source;
917    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
918    target=pArgs->target;
919    targetLimit=pArgs->targetLimit;
920    offsets=pArgs->offsets;
921    /* get the state machine state */
922    {
923        uint32_t status=cnv->toUnicodeStatus;
924        inDirectMode=(UBool)((status>>24)&1);
925        base64Counter=(int8_t)(status>>16);
926        bits=(uint16_t)status;
927    }
928    bytes=cnv->toUBytes;
929    byteIndex=cnv->toULength;
930
931    /* sourceIndex=-1 if the current character began in the previous buffer */
932    sourceIndex=byteIndex==0 ? 0 : -1;
933    nextSourceIndex=0;
934
935    if(inDirectMode) {
936directMode:
937        /*
938         * In Direct Mode, US-ASCII characters are encoded directly, i.e.,
939         * with their US-ASCII byte values.
940         * An ampersand starts Unicode (or "escape") Mode.
941         *
942         * In Direct Mode, only the sourceIndex is used.
943         */
944        byteIndex=0;
945        length=(int32_t)(sourceLimit-source);
946        targetCapacity=(int32_t)(targetLimit-target);
947        if(length>targetCapacity) {
948            length=targetCapacity;
949        }
950        while(length>0) {
951            b=*source++;
952            if(!isLegalIMAP(b)) {
953                /* illegal */
954                bytes[0]=b;
955                byteIndex=1;
956                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
957                break;
958            } else if(b!=AMPERSAND) {
959                /* write directly encoded character */
960                *target++=b;
961                if(offsets!=NULL) {
962                    *offsets++=sourceIndex++;
963                }
964            } else /* AMPERSAND */ {
965                /* switch to Unicode mode */
966                nextSourceIndex=++sourceIndex;
967                inDirectMode=FALSE;
968                byteIndex=0;
969                bits=0;
970                base64Counter=-1;
971                goto unicodeMode;
972            }
973            --length;
974        }
975        if(source<sourceLimit && target>=targetLimit) {
976            /* target is full */
977            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
978        }
979    } else {
980unicodeMode:
981        /*
982         * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded.
983         * The base64 sequence ends with any character that is not in the base64 alphabet.
984         * A terminating minus sign is consumed.
985         * US-ASCII must not be base64-ed.
986         *
987         * In Unicode Mode, the sourceIndex has the index to the start of the current
988         * base64 bytes, while nextSourceIndex is precisely parallel to source,
989         * keeping the index to the following byte.
990         * Note that in 2 out of 3 cases, UChars overlap within a base64 byte.
991         */
992        while(source<sourceLimit) {
993            if(target<targetLimit) {
994                bytes[byteIndex++]=b=*source++;
995                ++nextSourceIndex;
996                if(b>0x7e) {
997                    /* illegal - test other illegal US-ASCII values by base64Value==-3 */
998                    inDirectMode=TRUE;
999                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1000                    break;
1001                } else if((base64Value=FROM_BASE64_IMAP(b))>=0) {
1002                    /* collect base64 bytes into UChars */
1003                    switch(base64Counter) {
1004                    case -1: /* -1 is immediately after the & */
1005                    case 0:
1006                        bits=base64Value;
1007                        base64Counter=1;
1008                        break;
1009                    case 1:
1010                    case 3:
1011                    case 4:
1012                    case 6:
1013                        bits=(uint16_t)((bits<<6)|base64Value);
1014                        ++base64Counter;
1015                        break;
1016                    case 2:
1017                        c=(UChar)((bits<<4)|(base64Value>>2));
1018                        if(isLegalIMAP(c)) {
1019                            /* illegal */
1020                            inDirectMode=TRUE;
1021                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1022                            goto endloop;
1023                        }
1024                        *target++=c;
1025                        if(offsets!=NULL) {
1026                            *offsets++=sourceIndex;
1027                            sourceIndex=nextSourceIndex-1;
1028                        }
1029                        bytes[0]=b; /* keep this byte in case an error occurs */
1030                        byteIndex=1;
1031                        bits=(uint16_t)(base64Value&3);
1032                        base64Counter=3;
1033                        break;
1034                    case 5:
1035                        c=(UChar)((bits<<2)|(base64Value>>4));
1036                        if(isLegalIMAP(c)) {
1037                            /* illegal */
1038                            inDirectMode=TRUE;
1039                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1040                            goto endloop;
1041                        }
1042                        *target++=c;
1043                        if(offsets!=NULL) {
1044                            *offsets++=sourceIndex;
1045                            sourceIndex=nextSourceIndex-1;
1046                        }
1047                        bytes[0]=b; /* keep this byte in case an error occurs */
1048                        byteIndex=1;
1049                        bits=(uint16_t)(base64Value&15);
1050                        base64Counter=6;
1051                        break;
1052                    case 7:
1053                        c=(UChar)((bits<<6)|base64Value);
1054                        if(isLegalIMAP(c)) {
1055                            /* illegal */
1056                            inDirectMode=TRUE;
1057                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1058                            goto endloop;
1059                        }
1060                        *target++=c;
1061                        if(offsets!=NULL) {
1062                            *offsets++=sourceIndex;
1063                            sourceIndex=nextSourceIndex;
1064                        }
1065                        byteIndex=0;
1066                        bits=0;
1067                        base64Counter=0;
1068                        break;
1069                    default:
1070                        /* will never occur */
1071                        break;
1072                    }
1073                } else if(base64Value==-2) {
1074                    /* minus sign terminates the base64 sequence */
1075                    inDirectMode=TRUE;
1076                    if(base64Counter==-1) {
1077                        /* &- i.e. a minus immediately following an ampersand */
1078                        *target++=AMPERSAND;
1079                        if(offsets!=NULL) {
1080                            *offsets++=sourceIndex-1;
1081                        }
1082                    } else {
1083                        /* absorb the minus and leave the Unicode Mode */
1084                        if(bits!=0 || (base64Counter!=0 && base64Counter!=3 && base64Counter!=6)) {
1085                            /* bits are illegally left over, a UChar is incomplete */
1086                            /* base64Counter other than 0, 3, 6 means non-minimal zero-padding, also illegal */
1087                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1088                            break;
1089                        }
1090                    }
1091                    sourceIndex=nextSourceIndex;
1092                    goto directMode;
1093                } else {
1094                    if(base64Counter==-1) {
1095                        /* illegal: & immediately followed by something other than base64 or minus sign */
1096                        /* include the ampersand in the reported sequence */
1097                        --sourceIndex;
1098                        bytes[0]=AMPERSAND;
1099                        bytes[1]=b;
1100                        byteIndex=2;
1101                    }
1102                    /* base64Value==-1 for characters that are illegal only in Unicode mode */
1103                    /* base64Value==-3 for illegal characters */
1104                    /* illegal */
1105                    inDirectMode=TRUE;
1106                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1107                    break;
1108                }
1109            } else {
1110                /* target is full */
1111                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1112                break;
1113            }
1114        }
1115    }
1116endloop:
1117
1118    /*
1119     * the end of the input stream and detection of truncated input
1120     * are handled by the framework, but here we must check if we are in Unicode
1121     * mode and byteIndex==0 because we must end in direct mode
1122     *
1123     * conditions:
1124     *   successful
1125     *   in Unicode mode and byteIndex==0
1126     *   end of input and no truncated input
1127     */
1128    if( U_SUCCESS(*pErrorCode) &&
1129        !inDirectMode && byteIndex==0 &&
1130        pArgs->flush && source>=sourceLimit
1131    ) {
1132        if(base64Counter==-1) {
1133            /* & at the very end of the input */
1134            /* make the ampersand the reported sequence */
1135            bytes[0]=AMPERSAND;
1136            byteIndex=1;
1137        }
1138        /* else if(base64Counter!=-1) byteIndex remains 0 because there is no particular byte sequence */
1139
1140        inDirectMode=TRUE; /* avoid looping */
1141        *pErrorCode=U_TRUNCATED_CHAR_FOUND;
1142    }
1143
1144    /* set the converter state back into UConverter */
1145    cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
1146    cnv->toULength=byteIndex;
1147
1148    /* write back the updated pointers */
1149    pArgs->source=(const char *)source;
1150    pArgs->target=target;
1151    pArgs->offsets=offsets;
1152    return;
1153}
1154
1155static void
1156_IMAPFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
1157                            UErrorCode *pErrorCode) {
1158    UConverter *cnv;
1159    const UChar *source, *sourceLimit;
1160    uint8_t *target, *targetLimit;
1161    int32_t *offsets;
1162
1163    int32_t length, targetCapacity, sourceIndex;
1164    UChar c;
1165    uint8_t b;
1166
1167    /* UTF-7 state */
1168    uint8_t bits;
1169    int8_t base64Counter;
1170    UBool inDirectMode;
1171
1172    /* set up the local pointers */
1173    cnv=pArgs->converter;
1174
1175    /* set up the local pointers */
1176    source=pArgs->source;
1177    sourceLimit=pArgs->sourceLimit;
1178    target=(uint8_t *)pArgs->target;
1179    targetLimit=(uint8_t *)pArgs->targetLimit;
1180    offsets=pArgs->offsets;
1181
1182    /* get the state machine state */
1183    {
1184        uint32_t status=cnv->fromUnicodeStatus;
1185        inDirectMode=(UBool)((status>>24)&1);
1186        base64Counter=(int8_t)(status>>16);
1187        bits=(uint8_t)status;
1188    }
1189
1190    /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
1191    sourceIndex=0;
1192
1193    if(inDirectMode) {
1194directMode:
1195        length=(int32_t)(sourceLimit-source);
1196        targetCapacity=(int32_t)(targetLimit-target);
1197        if(length>targetCapacity) {
1198            length=targetCapacity;
1199        }
1200        while(length>0) {
1201            c=*source++;
1202            /* encode 0x20..0x7e except '&' directly */
1203            if(inSetDIMAP(c)) {
1204                /* encode directly */
1205                *target++=(uint8_t)c;
1206                if(offsets!=NULL) {
1207                    *offsets++=sourceIndex++;
1208                }
1209            } else if(c==AMPERSAND) {
1210                /* output &- for & */
1211                *target++=AMPERSAND;
1212                if(target<targetLimit) {
1213                    *target++=MINUS;
1214                    if(offsets!=NULL) {
1215                        *offsets++=sourceIndex;
1216                        *offsets++=sourceIndex++;
1217                    }
1218                    /* realign length and targetCapacity */
1219                    goto directMode;
1220                } else {
1221                    if(offsets!=NULL) {
1222                        *offsets++=sourceIndex++;
1223                    }
1224                    cnv->charErrorBuffer[0]=MINUS;
1225                    cnv->charErrorBufferLength=1;
1226                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1227                    break;
1228                }
1229            } else {
1230                /* un-read this character and switch to Unicode Mode */
1231                --source;
1232                *target++=AMPERSAND;
1233                if(offsets!=NULL) {
1234                    *offsets++=sourceIndex;
1235                }
1236                inDirectMode=FALSE;
1237                base64Counter=0;
1238                goto unicodeMode;
1239            }
1240            --length;
1241        }
1242        if(source<sourceLimit && target>=targetLimit) {
1243            /* target is full */
1244            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1245        }
1246    } else {
1247unicodeMode:
1248        while(source<sourceLimit) {
1249            if(target<targetLimit) {
1250                c=*source++;
1251                if(isLegalIMAP(c)) {
1252                    /* encode directly */
1253                    inDirectMode=TRUE;
1254
1255                    /* trick: back out this character to make this easier */
1256                    --source;
1257
1258                    /* terminate the base64 sequence */
1259                    if(base64Counter!=0) {
1260                        /* write remaining bits for the previous character */
1261                        *target++=TO_BASE64_IMAP(bits);
1262                        if(offsets!=NULL) {
1263                            *offsets++=sourceIndex-1;
1264                        }
1265                    }
1266                    /* need to terminate with a minus */
1267                    if(target<targetLimit) {
1268                        *target++=MINUS;
1269                        if(offsets!=NULL) {
1270                            *offsets++=sourceIndex-1;
1271                        }
1272                    } else {
1273                        cnv->charErrorBuffer[0]=MINUS;
1274                        cnv->charErrorBufferLength=1;
1275                        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1276                        break;
1277                    }
1278                    goto directMode;
1279                } else {
1280                    /*
1281                     * base64 this character:
1282                     * Output 2 or 3 base64 bytes for the remaining bits of the previous character
1283                     * and the bits of this character, each implicitly in UTF-16BE.
1284                     *
1285                     * Here, bits is an 8-bit variable because only 6 bits need to be kept from one
1286                     * character to the next. The actual 2 or 4 bits are shifted to the left edge
1287                     * of the 6-bits field 5..0 to make the termination of the base64 sequence easier.
1288                     */
1289                    switch(base64Counter) {
1290                    case 0:
1291                        b=(uint8_t)(c>>10);
1292                        *target++=TO_BASE64_IMAP(b);
1293                        if(target<targetLimit) {
1294                            b=(uint8_t)((c>>4)&0x3f);
1295                            *target++=TO_BASE64_IMAP(b);
1296                            if(offsets!=NULL) {
1297                                *offsets++=sourceIndex;
1298                                *offsets++=sourceIndex++;
1299                            }
1300                        } else {
1301                            if(offsets!=NULL) {
1302                                *offsets++=sourceIndex++;
1303                            }
1304                            b=(uint8_t)((c>>4)&0x3f);
1305                            cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
1306                            cnv->charErrorBufferLength=1;
1307                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1308                        }
1309                        bits=(uint8_t)((c&15)<<2);
1310                        base64Counter=1;
1311                        break;
1312                    case 1:
1313                        b=(uint8_t)(bits|(c>>14));
1314                        *target++=TO_BASE64_IMAP(b);
1315                        if(target<targetLimit) {
1316                            b=(uint8_t)((c>>8)&0x3f);
1317                            *target++=TO_BASE64_IMAP(b);
1318                            if(target<targetLimit) {
1319                                b=(uint8_t)((c>>2)&0x3f);
1320                                *target++=TO_BASE64_IMAP(b);
1321                                if(offsets!=NULL) {
1322                                    *offsets++=sourceIndex;
1323                                    *offsets++=sourceIndex;
1324                                    *offsets++=sourceIndex++;
1325                                }
1326                            } else {
1327                                if(offsets!=NULL) {
1328                                    *offsets++=sourceIndex;
1329                                    *offsets++=sourceIndex++;
1330                                }
1331                                b=(uint8_t)((c>>2)&0x3f);
1332                                cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
1333                                cnv->charErrorBufferLength=1;
1334                                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1335                            }
1336                        } else {
1337                            if(offsets!=NULL) {
1338                                *offsets++=sourceIndex++;
1339                            }
1340                            b=(uint8_t)((c>>8)&0x3f);
1341                            cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
1342                            b=(uint8_t)((c>>2)&0x3f);
1343                            cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);
1344                            cnv->charErrorBufferLength=2;
1345                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1346                        }
1347                        bits=(uint8_t)((c&3)<<4);
1348                        base64Counter=2;
1349                        break;
1350                    case 2:
1351                        b=(uint8_t)(bits|(c>>12));
1352                        *target++=TO_BASE64_IMAP(b);
1353                        if(target<targetLimit) {
1354                            b=(uint8_t)((c>>6)&0x3f);
1355                            *target++=TO_BASE64_IMAP(b);
1356                            if(target<targetLimit) {
1357                                b=(uint8_t)(c&0x3f);
1358                                *target++=TO_BASE64_IMAP(b);
1359                                if(offsets!=NULL) {
1360                                    *offsets++=sourceIndex;
1361                                    *offsets++=sourceIndex;
1362                                    *offsets++=sourceIndex++;
1363                                }
1364                            } else {
1365                                if(offsets!=NULL) {
1366                                    *offsets++=sourceIndex;
1367                                    *offsets++=sourceIndex++;
1368                                }
1369                                b=(uint8_t)(c&0x3f);
1370                                cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
1371                                cnv->charErrorBufferLength=1;
1372                                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1373                            }
1374                        } else {
1375                            if(offsets!=NULL) {
1376                                *offsets++=sourceIndex++;
1377                            }
1378                            b=(uint8_t)((c>>6)&0x3f);
1379                            cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
1380                            b=(uint8_t)(c&0x3f);
1381                            cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);
1382                            cnv->charErrorBufferLength=2;
1383                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1384                        }
1385                        bits=0;
1386                        base64Counter=0;
1387                        break;
1388                    default:
1389                        /* will never occur */
1390                        break;
1391                    }
1392                }
1393            } else {
1394                /* target is full */
1395                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1396                break;
1397            }
1398        }
1399    }
1400
1401    if(pArgs->flush && source>=sourceLimit) {
1402        /* flush remaining bits to the target */
1403        if(!inDirectMode) {
1404            if(base64Counter!=0) {
1405                if(target<targetLimit) {
1406                    *target++=TO_BASE64_IMAP(bits);
1407                    if(offsets!=NULL) {
1408                        *offsets++=sourceIndex-1;
1409                    }
1410                } else {
1411                    cnv->charErrorBuffer[cnv->charErrorBufferLength++]=TO_BASE64_IMAP(bits);
1412                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1413                }
1414            }
1415            /* need to terminate with a minus */
1416            if(target<targetLimit) {
1417                *target++=MINUS;
1418                if(offsets!=NULL) {
1419                    *offsets++=sourceIndex-1;
1420                }
1421            } else {
1422                cnv->charErrorBuffer[cnv->charErrorBufferLength++]=MINUS;
1423                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1424            }
1425        }
1426        /* reset the state for the next conversion */
1427        cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
1428    } else {
1429        /* set the converter state back into UConverter */
1430        cnv->fromUnicodeStatus=
1431            (cnv->fromUnicodeStatus&0xf0000000)|    /* keep version*/
1432            ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits;
1433    }
1434
1435    /* write back the updated pointers */
1436    pArgs->source=source;
1437    pArgs->target=(char *)target;
1438    pArgs->offsets=offsets;
1439    return;
1440}
1441
1442static const UConverterImpl _IMAPImpl={
1443    UCNV_IMAP_MAILBOX,
1444
1445    NULL,
1446    NULL,
1447
1448    _UTF7Open,
1449    NULL,
1450    _UTF7Reset,
1451
1452    _IMAPToUnicodeWithOffsets,
1453    _IMAPToUnicodeWithOffsets,
1454    _IMAPFromUnicodeWithOffsets,
1455    _IMAPFromUnicodeWithOffsets,
1456    NULL,
1457
1458    NULL,
1459    NULL,
1460    NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */
1461    NULL,
1462    ucnv_getCompleteUnicodeSet
1463};
1464
1465static const UConverterStaticData _IMAPStaticData={
1466    sizeof(UConverterStaticData),
1467    "IMAP-mailbox-name",
1468    0, /* TODO CCSID for IMAP-mailbox-name */
1469    UCNV_IBM, UCNV_IMAP_MAILBOX,
1470    1, 4,
1471    { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */
1472    FALSE, FALSE,
1473    0,
1474    0,
1475    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
1476};
1477
1478const UConverterSharedData _IMAPData={
1479    sizeof(UConverterSharedData), ~((uint32_t)0),
1480    NULL, NULL, &_IMAPStaticData, FALSE, &_IMAPImpl,
1481    0
1482};
1483
1484#endif
1485