1/*
2**********************************************************************
3*   Copyright (C) 2002-2009, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*   file name:  ucnv_u7.c
7*   encoding:   US-ASCII
8*   tab size:   8 (not used)
9*   indentation:4
10*
11*   created on: 2002jul01
12*   created by: Markus W. Scherer
13*
14*   UTF-7 converter implementation. Used to be in ucnv_utf.c.
15*/
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_CONVERSION
20
21#include "unicode/ucnv.h"
22#include "ucnv_bld.h"
23#include "ucnv_cnv.h"
24
25/* UTF-7 -------------------------------------------------------------------- */
26
27/*
28 * UTF-7 is a stateful encoding of Unicode.
29 * It is defined in RFC 2152. (http://www.ietf.org/rfc/rfc2152.txt)
30 * It was intended for use in Internet email systems, using in its bytewise
31 * encoding only a subset of 7-bit US-ASCII.
32 * UTF-7 is deprecated in favor of UTF-8/16/32 and SCSU, but still
33 * occasionally used.
34 *
35 * For converting Unicode to UTF-7, the RFC allows to encode some US-ASCII
36 * characters directly or in base64. Especially, the characters in set O
37 * as defined in the RFC (see below) may be encoded directly but are not
38 * allowed in, e.g., email headers.
39 * By default, the ICU UTF-7 converter encodes set O directly.
40 * By choosing the option "version=1", set O will be escaped instead.
41 * For example:
42 *     utf7Converter=ucnv_open("UTF-7,version=1");
43 *
44 * For details about email headers see RFC 2047.
45 */
46
47/*
48 * Tests for US-ASCII characters belonging to character classes
49 * defined in UTF-7.
50 *
51 * Set D (directly encoded characters) consists of the following
52 * characters: the upper and lower case letters A through Z
53 * and a through z, the 10 digits 0-9, and the following nine special
54 * characters (note that "+" and "=" are omitted):
55 *     '(),-./:?
56 *
57 * Set O (optional direct characters) consists of the following
58 * characters (note that "\" and "~" are omitted):
59 *     !"#$%&*;<=>@[]^_`{|}
60 *
61 * According to the rules in RFC 2152, the byte values for the following
62 * US-ASCII characters are not used in UTF-7 and are therefore illegal:
63 * - all C0 control codes except for CR LF TAB
64 * - BACKSLASH
65 * - TILDE
66 * - DEL
67 * - all codes beyond US-ASCII, i.e. all >127
68 */
69#define inSetD(c) \
70    ((uint8_t)((c)-97)<26 || (uint8_t)((c)-65)<26 || /* letters */ \
71     (uint8_t)((c)-48)<10 ||    /* digits */ \
72     (uint8_t)((c)-39)<3 ||     /* '() */ \
73     (uint8_t)((c)-44)<4 ||     /* ,-./ */ \
74     (c)==58 || (c)==63         /* :? */ \
75    )
76
77#define inSetO(c) \
78    ((uint8_t)((c)-33)<6 ||         /* !"#$%& */ \
79     (uint8_t)((c)-59)<4 ||         /* ;<=> */ \
80     (uint8_t)((c)-93)<4 ||         /* ]^_` */ \
81     (uint8_t)((c)-123)<3 ||        /* {|} */ \
82     (c)==42 || (c)==64 || (c)==91  /* *@[ */ \
83    )
84
85#define isCRLFTAB(c) ((c)==13 || (c)==10 || (c)==9)
86#define isCRLFSPTAB(c) ((c)==32 || (c)==13 || (c)==10 || (c)==9)
87
88#define PLUS  43
89#define MINUS 45
90#define BACKSLASH 92
91#define TILDE 126
92
93/* legal byte values: all US-ASCII graphic characters from space to before tilde, and CR LF TAB */
94#define isLegalUTF7(c) (((uint8_t)((c)-32)<94 && (c)!=BACKSLASH) || isCRLFTAB(c))
95
96/* encode directly sets D and O and CR LF SP TAB */
97static const UBool encodeDirectlyMaximum[128]={
98 /* 0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
99    0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
100    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
101
102    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
103    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
104
105    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
106    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
107
108    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0
110};
111
112/* encode directly set D and CR LF SP TAB but not set O */
113static const UBool encodeDirectlyRestricted[128]={
114 /* 0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
115    0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
116    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
117
118    1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1,
119    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
120
121    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
122    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
123
124    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
125    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0
126};
127
128static const uint8_t
129toBase64[64]={
130    /* A-Z */
131    65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
132    78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
133    /* a-z */
134    97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
135    110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
136    /* 0-9 */
137    48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
138    /* +/ */
139    43, 47
140};
141
142static const int8_t
143fromBase64[128]={
144    /* C0 controls, -1 for legal ones (CR LF TAB), -3 for illegal ones */
145    -3, -3, -3, -3, -3, -3, -3, -3, -3, -1, -1, -3, -3, -1, -3, -3,
146    -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
147
148    /* general punctuation with + and / and a special value (-2) for - */
149    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -2, -1, 63,
150    /* digits */
151    52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
152
153    /* A-Z */
154    -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
155    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1,
156
157    /* a-z */
158    -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
159    41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -3, -3
160};
161
162/*
163 * converter status values:
164 *
165 * toUnicodeStatus:
166 *     24 inDirectMode (boolean)
167 * 23..16 base64Counter (-1..7)
168 * 15..0  bits (up to 14 bits incoming base64)
169 *
170 * fromUnicodeStatus:
171 * 31..28 version (0: set O direct  1: set O escaped)
172 *     24 inDirectMode (boolean)
173 * 23..16 base64Counter (0..2)
174 *  7..0  bits (6 bits outgoing base64)
175 *
176 */
177
178static void
179_UTF7Reset(UConverter *cnv, UConverterResetChoice choice) {
180    if(choice<=UCNV_RESET_TO_UNICODE) {
181        /* reset toUnicode */
182        cnv->toUnicodeStatus=0x1000000; /* inDirectMode=TRUE */
183        cnv->toULength=0;
184    }
185    if(choice!=UCNV_RESET_TO_UNICODE) {
186        /* reset fromUnicode */
187        cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
188    }
189}
190
191static void
192_UTF7Open(UConverter *cnv,
193          UConverterLoadArgs *pArgs,
194          UErrorCode *pErrorCode) {
195    if(UCNV_GET_VERSION(cnv)<=1) {
196        /* TODO(markus): Should just use cnv->options rather than copying the version number. */
197        cnv->fromUnicodeStatus=UCNV_GET_VERSION(cnv)<<28;
198        _UTF7Reset(cnv, UCNV_RESET_BOTH);
199    } else {
200        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
201    }
202}
203
204static void
205_UTF7ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
206                          UErrorCode *pErrorCode) {
207    UConverter *cnv;
208    const uint8_t *source, *sourceLimit;
209    UChar *target;
210    const UChar *targetLimit;
211    int32_t *offsets;
212
213    uint8_t *bytes;
214    uint8_t byteIndex;
215
216    int32_t length, targetCapacity;
217
218    /* UTF-7 state */
219    uint16_t bits;
220    int8_t base64Counter;
221    UBool inDirectMode;
222
223    int8_t base64Value;
224
225    int32_t sourceIndex, nextSourceIndex;
226
227    uint8_t b;
228    /* set up the local pointers */
229    cnv=pArgs->converter;
230
231    source=(const uint8_t *)pArgs->source;
232    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
233    target=pArgs->target;
234    targetLimit=pArgs->targetLimit;
235    offsets=pArgs->offsets;
236    /* get the state machine state */
237    {
238        uint32_t status=cnv->toUnicodeStatus;
239        inDirectMode=(UBool)((status>>24)&1);
240        base64Counter=(int8_t)(status>>16);
241        bits=(uint16_t)status;
242    }
243    bytes=cnv->toUBytes;
244    byteIndex=cnv->toULength;
245
246    /* sourceIndex=-1 if the current character began in the previous buffer */
247    sourceIndex=byteIndex==0 ? 0 : -1;
248    nextSourceIndex=0;
249
250    if(inDirectMode) {
251directMode:
252        /*
253         * In Direct Mode, most US-ASCII characters are encoded directly, i.e.,
254         * with their US-ASCII byte values.
255         * Backslash and Tilde and most control characters are not allowed in UTF-7.
256         * A plus sign starts Unicode (or "escape") Mode.
257         *
258         * In Direct Mode, only the sourceIndex is used.
259         */
260        byteIndex=0;
261        length=(int32_t)(sourceLimit-source);
262        targetCapacity=(int32_t)(targetLimit-target);
263        if(length>targetCapacity) {
264            length=targetCapacity;
265        }
266        while(length>0) {
267            b=*source++;
268            if(!isLegalUTF7(b)) {
269                /* illegal */
270                bytes[0]=b;
271                byteIndex=1;
272                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
273                break;
274            } else if(b!=PLUS) {
275                /* write directly encoded character */
276                *target++=b;
277                if(offsets!=NULL) {
278                    *offsets++=sourceIndex++;
279                }
280            } else /* PLUS */ {
281                /* switch to Unicode mode */
282                nextSourceIndex=++sourceIndex;
283                inDirectMode=FALSE;
284                byteIndex=0;
285                bits=0;
286                base64Counter=-1;
287                goto unicodeMode;
288            }
289            --length;
290        }
291        if(source<sourceLimit && target>=targetLimit) {
292            /* target is full */
293            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
294        }
295    } else {
296unicodeMode:
297        /*
298         * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded.
299         * The base64 sequence ends with any character that is not in the base64 alphabet.
300         * A terminating minus sign is consumed.
301         *
302         * In Unicode Mode, the sourceIndex has the index to the start of the current
303         * base64 bytes, while nextSourceIndex is precisely parallel to source,
304         * keeping the index to the following byte.
305         * Note that in 2 out of 3 cases, UChars overlap within a base64 byte.
306         */
307        while(source<sourceLimit) {
308            if(target<targetLimit) {
309                bytes[byteIndex++]=b=*source++;
310                ++nextSourceIndex;
311                if(b>=126) {
312                    /* illegal - test other illegal US-ASCII values by base64Value==-3 */
313                    inDirectMode=TRUE;
314                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
315                    break;
316                } else if((base64Value=fromBase64[b])>=0) {
317                    /* collect base64 bytes into UChars */
318                    switch(base64Counter) {
319                    case -1: /* -1 is immediately after the + */
320                    case 0:
321                        bits=base64Value;
322                        base64Counter=1;
323                        break;
324                    case 1:
325                    case 3:
326                    case 4:
327                    case 6:
328                        bits=(uint16_t)((bits<<6)|base64Value);
329                        ++base64Counter;
330                        break;
331                    case 2:
332                        *target++=(UChar)((bits<<4)|(base64Value>>2));
333                        if(offsets!=NULL) {
334                            *offsets++=sourceIndex;
335                            sourceIndex=nextSourceIndex-1;
336                        }
337                        bytes[0]=b; /* keep this byte in case an error occurs */
338                        byteIndex=1;
339                        bits=(uint16_t)(base64Value&3);
340                        base64Counter=3;
341                        break;
342                    case 5:
343                        *target++=(UChar)((bits<<2)|(base64Value>>4));
344                        if(offsets!=NULL) {
345                            *offsets++=sourceIndex;
346                            sourceIndex=nextSourceIndex-1;
347                        }
348                        bytes[0]=b; /* keep this byte in case an error occurs */
349                        byteIndex=1;
350                        bits=(uint16_t)(base64Value&15);
351                        base64Counter=6;
352                        break;
353                    case 7:
354                        *target++=(UChar)((bits<<6)|base64Value);
355                        if(offsets!=NULL) {
356                            *offsets++=sourceIndex;
357                            sourceIndex=nextSourceIndex;
358                        }
359                        byteIndex=0;
360                        bits=0;
361                        base64Counter=0;
362                        break;
363                    default:
364                        /* will never occur */
365                        break;
366                    }
367                } else if(base64Value==-2) {
368                    /* minus sign terminates the base64 sequence */
369                    inDirectMode=TRUE;
370                    if(base64Counter==-1) {
371                        /* +- i.e. a minus immediately following a plus */
372                        *target++=PLUS;
373                        if(offsets!=NULL) {
374                            *offsets++=sourceIndex-1;
375                        }
376                    } else {
377                        /* absorb the minus and leave the Unicode Mode */
378                        if(bits!=0) {
379                            /* bits are illegally left over, a UChar is incomplete */
380                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
381                            break;
382                        }
383                    }
384                    sourceIndex=nextSourceIndex;
385                    goto directMode;
386                } else if(base64Value==-1) /* for any legal character except base64 and minus sign */ {
387                    /* leave the Unicode Mode */
388                    inDirectMode=TRUE;
389                    if(base64Counter==-1) {
390                        /* illegal: + immediately followed by something other than base64 or minus sign */
391                        /* include the plus sign in the reported sequence */
392                        --sourceIndex;
393                        bytes[0]=PLUS;
394                        bytes[1]=b;
395                        byteIndex=2;
396                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
397                        break;
398                    } else if(bits==0) {
399                        /* un-read the character in case it is a plus sign */
400                        --source;
401                        sourceIndex=nextSourceIndex-1;
402                        goto directMode;
403                    } else {
404                        /* bits are illegally left over, a UChar is incomplete */
405                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
406                        break;
407                    }
408                } else /* base64Value==-3 for illegal characters */ {
409                    /* illegal */
410                    inDirectMode=TRUE;
411                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
412                    break;
413                }
414            } else {
415                /* target is full */
416                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
417                break;
418            }
419        }
420    }
421
422    if(U_SUCCESS(*pErrorCode) && pArgs->flush && source==sourceLimit && bits==0) {
423        /*
424         * if we are in Unicode mode, then the byteIndex might not be 0,
425         * but that is ok if bits==0
426         * -> we set byteIndex=0 at the end of the stream to avoid a truncated error
427         * (not true for IMAP-mailbox-name where we must end in direct mode)
428         */
429        byteIndex=0;
430    }
431
432    /* set the converter state back into UConverter */
433    cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
434    cnv->toULength=byteIndex;
435
436    /* write back the updated pointers */
437    pArgs->source=(const char *)source;
438    pArgs->target=target;
439    pArgs->offsets=offsets;
440    return;
441}
442
443static void
444_UTF7FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
445                            UErrorCode *pErrorCode) {
446    UConverter *cnv;
447    const UChar *source, *sourceLimit;
448    uint8_t *target, *targetLimit;
449    int32_t *offsets;
450
451    int32_t length, targetCapacity, sourceIndex;
452    UChar c;
453
454    /* UTF-7 state */
455    const UBool *encodeDirectly;
456    uint8_t bits;
457    int8_t base64Counter;
458    UBool inDirectMode;
459
460    /* set up the local pointers */
461    cnv=pArgs->converter;
462
463    /* set up the local pointers */
464    source=pArgs->source;
465    sourceLimit=pArgs->sourceLimit;
466    target=(uint8_t *)pArgs->target;
467    targetLimit=(uint8_t *)pArgs->targetLimit;
468    offsets=pArgs->offsets;
469
470    /* get the state machine state */
471    {
472        uint32_t status=cnv->fromUnicodeStatus;
473        encodeDirectly= status<0x10000000 ? encodeDirectlyMaximum : encodeDirectlyRestricted;
474        inDirectMode=(UBool)((status>>24)&1);
475        base64Counter=(int8_t)(status>>16);
476        bits=(uint8_t)status;
477    }
478
479    /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
480    sourceIndex=0;
481
482    if(inDirectMode) {
483directMode:
484        length=(int32_t)(sourceLimit-source);
485        targetCapacity=(int32_t)(targetLimit-target);
486        if(length>targetCapacity) {
487            length=targetCapacity;
488        }
489        while(length>0) {
490            c=*source++;
491            /* currently always encode CR LF SP TAB directly */
492            if(c<=127 && encodeDirectly[c]) {
493                /* encode directly */
494                *target++=(uint8_t)c;
495                if(offsets!=NULL) {
496                    *offsets++=sourceIndex++;
497                }
498            } else if(c==PLUS) {
499                /* output +- for + */
500                *target++=PLUS;
501                if(target<targetLimit) {
502                    *target++=MINUS;
503                    if(offsets!=NULL) {
504                        *offsets++=sourceIndex;
505                        *offsets++=sourceIndex++;
506                    }
507                    /* realign length and targetCapacity */
508                    goto directMode;
509                } else {
510                    if(offsets!=NULL) {
511                        *offsets++=sourceIndex++;
512                    }
513                    cnv->charErrorBuffer[0]=MINUS;
514                    cnv->charErrorBufferLength=1;
515                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
516                    break;
517                }
518            } else {
519                /* un-read this character and switch to Unicode Mode */
520                --source;
521                *target++=PLUS;
522                if(offsets!=NULL) {
523                    *offsets++=sourceIndex;
524                }
525                inDirectMode=FALSE;
526                base64Counter=0;
527                goto unicodeMode;
528            }
529            --length;
530        }
531        if(source<sourceLimit && target>=targetLimit) {
532            /* target is full */
533            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
534        }
535    } else {
536unicodeMode:
537        while(source<sourceLimit) {
538            if(target<targetLimit) {
539                c=*source++;
540                if(c<=127 && encodeDirectly[c]) {
541                    /* encode directly */
542                    inDirectMode=TRUE;
543
544                    /* trick: back out this character to make this easier */
545                    --source;
546
547                    /* terminate the base64 sequence */
548                    if(base64Counter!=0) {
549                        /* write remaining bits for the previous character */
550                        *target++=toBase64[bits];
551                        if(offsets!=NULL) {
552                            *offsets++=sourceIndex-1;
553                        }
554                    }
555                    if(fromBase64[c]!=-1) {
556                        /* need to terminate with a minus */
557                        if(target<targetLimit) {
558                            *target++=MINUS;
559                            if(offsets!=NULL) {
560                                *offsets++=sourceIndex-1;
561                            }
562                        } else {
563                            cnv->charErrorBuffer[0]=MINUS;
564                            cnv->charErrorBufferLength=1;
565                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
566                            break;
567                        }
568                    }
569                    goto directMode;
570                } else {
571                    /*
572                     * base64 this character:
573                     * Output 2 or 3 base64 bytes for the remaining bits of the previous character
574                     * and the bits of this character, each implicitly in UTF-16BE.
575                     *
576                     * Here, bits is an 8-bit variable because only 6 bits need to be kept from one
577                     * character to the next. The actual 2 or 4 bits are shifted to the left edge
578                     * of the 6-bits field 5..0 to make the termination of the base64 sequence easier.
579                     */
580                    switch(base64Counter) {
581                    case 0:
582                        *target++=toBase64[c>>10];
583                        if(target<targetLimit) {
584                            *target++=toBase64[(c>>4)&0x3f];
585                            if(offsets!=NULL) {
586                                *offsets++=sourceIndex;
587                                *offsets++=sourceIndex++;
588                            }
589                        } else {
590                            if(offsets!=NULL) {
591                                *offsets++=sourceIndex++;
592                            }
593                            cnv->charErrorBuffer[0]=toBase64[(c>>4)&0x3f];
594                            cnv->charErrorBufferLength=1;
595                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
596                        }
597                        bits=(uint8_t)((c&15)<<2);
598                        base64Counter=1;
599                        break;
600                    case 1:
601                        *target++=toBase64[bits|(c>>14)];
602                        if(target<targetLimit) {
603                            *target++=toBase64[(c>>8)&0x3f];
604                            if(target<targetLimit) {
605                                *target++=toBase64[(c>>2)&0x3f];
606                                if(offsets!=NULL) {
607                                    *offsets++=sourceIndex;
608                                    *offsets++=sourceIndex;
609                                    *offsets++=sourceIndex++;
610                                }
611                            } else {
612                                if(offsets!=NULL) {
613                                    *offsets++=sourceIndex;
614                                    *offsets++=sourceIndex++;
615                                }
616                                cnv->charErrorBuffer[0]=toBase64[(c>>2)&0x3f];
617                                cnv->charErrorBufferLength=1;
618                                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
619                            }
620                        } else {
621                            if(offsets!=NULL) {
622                                *offsets++=sourceIndex++;
623                            }
624                            cnv->charErrorBuffer[0]=toBase64[(c>>8)&0x3f];
625                            cnv->charErrorBuffer[1]=toBase64[(c>>2)&0x3f];
626                            cnv->charErrorBufferLength=2;
627                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
628                        }
629                        bits=(uint8_t)((c&3)<<4);
630                        base64Counter=2;
631                        break;
632                    case 2:
633                        *target++=toBase64[bits|(c>>12)];
634                        if(target<targetLimit) {
635                            *target++=toBase64[(c>>6)&0x3f];
636                            if(target<targetLimit) {
637                                *target++=toBase64[c&0x3f];
638                                if(offsets!=NULL) {
639                                    *offsets++=sourceIndex;
640                                    *offsets++=sourceIndex;
641                                    *offsets++=sourceIndex++;
642                                }
643                            } else {
644                                if(offsets!=NULL) {
645                                    *offsets++=sourceIndex;
646                                    *offsets++=sourceIndex++;
647                                }
648                                cnv->charErrorBuffer[0]=toBase64[c&0x3f];
649                                cnv->charErrorBufferLength=1;
650                                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
651                            }
652                        } else {
653                            if(offsets!=NULL) {
654                                *offsets++=sourceIndex++;
655                            }
656                            cnv->charErrorBuffer[0]=toBase64[(c>>6)&0x3f];
657                            cnv->charErrorBuffer[1]=toBase64[c&0x3f];
658                            cnv->charErrorBufferLength=2;
659                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
660                        }
661                        bits=0;
662                        base64Counter=0;
663                        break;
664                    default:
665                        /* will never occur */
666                        break;
667                    }
668                }
669            } else {
670                /* target is full */
671                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
672                break;
673            }
674        }
675    }
676
677    if(pArgs->flush && source>=sourceLimit) {
678        /* flush remaining bits to the target */
679        if(!inDirectMode && base64Counter!=0) {
680            if(target<targetLimit) {
681                *target++=toBase64[bits];
682                if(offsets!=NULL) {
683                    *offsets++=sourceIndex-1;
684                }
685            } else {
686                cnv->charErrorBuffer[cnv->charErrorBufferLength++]=toBase64[bits];
687                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
688            }
689        }
690        /* reset the state for the next conversion */
691        cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
692    } else {
693        /* set the converter state back into UConverter */
694        cnv->fromUnicodeStatus=
695            (cnv->fromUnicodeStatus&0xf0000000)|    /* keep version*/
696            ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits;
697    }
698
699    /* write back the updated pointers */
700    pArgs->source=source;
701    pArgs->target=(char *)target;
702    pArgs->offsets=offsets;
703    return;
704}
705
706static const char *
707_UTF7GetName(const UConverter *cnv) {
708    switch(cnv->fromUnicodeStatus>>28) {
709    case 1:
710        return "UTF-7,version=1";
711    default:
712        return "UTF-7";
713    }
714}
715
716static const UConverterImpl _UTF7Impl={
717    UCNV_UTF7,
718
719    NULL,
720    NULL,
721
722    _UTF7Open,
723    NULL,
724    _UTF7Reset,
725
726    _UTF7ToUnicodeWithOffsets,
727    _UTF7ToUnicodeWithOffsets,
728    _UTF7FromUnicodeWithOffsets,
729    _UTF7FromUnicodeWithOffsets,
730    NULL,
731
732    NULL,
733    _UTF7GetName,
734    NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */
735    NULL,
736    ucnv_getCompleteUnicodeSet
737};
738
739static const UConverterStaticData _UTF7StaticData={
740    sizeof(UConverterStaticData),
741    "UTF-7",
742    0, /* TODO CCSID for UTF-7 */
743    UCNV_IBM, UCNV_UTF7,
744    1, 4,
745    { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */
746    FALSE, FALSE,
747    0,
748    0,
749    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
750};
751
752const UConverterSharedData _UTF7Data={
753    sizeof(UConverterSharedData), ~((uint32_t)0),
754    NULL, NULL, &_UTF7StaticData, FALSE, &_UTF7Impl,
755    0
756};
757
758/* IMAP mailbox name encoding ----------------------------------------------- */
759
760/*
761 * RFC 2060: INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1
762 * http://www.ietf.org/rfc/rfc2060.txt
763 *
764 * 5.1.3.  Mailbox International Naming Convention
765 *
766 * By convention, international mailbox names are specified using a
767 * modified version of the UTF-7 encoding described in [UTF-7].  The
768 * purpose of these modifications is to correct the following problems
769 * with UTF-7:
770 *
771 *    1) UTF-7 uses the "+" character for shifting; this conflicts with
772 *       the common use of "+" in mailbox names, in particular USENET
773 *       newsgroup names.
774 *
775 *    2) UTF-7's encoding is BASE64 which uses the "/" character; this
776 *       conflicts with the use of "/" as a popular hierarchy delimiter.
777 *
778 *    3) UTF-7 prohibits the unencoded usage of "\"; this conflicts with
779 *       the use of "\" as a popular hierarchy delimiter.
780 *
781 *    4) UTF-7 prohibits the unencoded usage of "~"; this conflicts with
782 *       the use of "~" in some servers as a home directory indicator.
783 *
784 *    5) UTF-7 permits multiple alternate forms to represent the same
785 *       string; in particular, printable US-ASCII chararacters can be
786 *       represented in encoded form.
787 *
788 * In modified UTF-7, printable US-ASCII characters except for "&"
789 * represent themselves; that is, characters with octet values 0x20-0x25
790 * and 0x27-0x7e.  The character "&" (0x26) is represented by the two-
791 * octet sequence "&-".
792 *
793 * All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all
794 * Unicode 16-bit octets) are represented in modified BASE64, with a
795 * further modification from [UTF-7] that "," is used instead of "/".
796 * Modified BASE64 MUST NOT be used to represent any printing US-ASCII
797 * character which can represent itself.
798 *
799 * "&" is used to shift to modified BASE64 and "-" to shift back to US-
800 * ASCII.  All names start in US-ASCII, and MUST end in US-ASCII (that
801 * is, a name that ends with a Unicode 16-bit octet MUST end with a "-
802 * ").
803 *
804 * For example, here is a mailbox name which mixes English, Japanese,
805 * and Chinese text: ~peter/mail/&ZeVnLIqe-/&U,BTFw-
806 */
807
808/*
809 * Tests for US-ASCII characters belonging to character classes
810 * defined in UTF-7.
811 *
812 * Set D (directly encoded characters) consists of the following
813 * characters: the upper and lower case letters A through Z
814 * and a through z, the 10 digits 0-9, and the following nine special
815 * characters (note that "+" and "=" are omitted):
816 *     '(),-./:?
817 *
818 * Set O (optional direct characters) consists of the following
819 * characters (note that "\" and "~" are omitted):
820 *     !"#$%&*;<=>@[]^_`{|}
821 *
822 * According to the rules in RFC 2152, the byte values for the following
823 * US-ASCII characters are not used in UTF-7 and are therefore illegal:
824 * - all C0 control codes except for CR LF TAB
825 * - BACKSLASH
826 * - TILDE
827 * - DEL
828 * - all codes beyond US-ASCII, i.e. all >127
829 */
830
831/* uses '&' not '+' to start a base64 sequence */
832#define AMPERSAND 0x26
833#define COMMA 0x2c
834#define SLASH 0x2f
835
836/* legal byte values: all US-ASCII graphic characters 0x20..0x7e */
837#define isLegalIMAP(c) (0x20<=(c) && (c)<=0x7e)
838
839/* direct-encode all of printable ASCII 0x20..0x7e except '&' 0x26 */
840#define inSetDIMAP(c) (isLegalIMAP(c) && c!=AMPERSAND)
841
842#define TO_BASE64_IMAP(n) ((n)<63 ? toBase64[n] : COMMA)
843#define FROM_BASE64_IMAP(c) ((c)==COMMA ? 63 : (c)==SLASH ? -1 : fromBase64[c])
844
845/*
846 * converter status values:
847 *
848 * toUnicodeStatus:
849 *     24 inDirectMode (boolean)
850 * 23..16 base64Counter (-1..7)
851 * 15..0  bits (up to 14 bits incoming base64)
852 *
853 * fromUnicodeStatus:
854 *     24 inDirectMode (boolean)
855 * 23..16 base64Counter (0..2)
856 *  7..0  bits (6 bits outgoing base64)
857 *
858 * ignore bits 31..25
859 */
860
861static void
862_IMAPToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
863                          UErrorCode *pErrorCode) {
864    UConverter *cnv;
865    const uint8_t *source, *sourceLimit;
866    UChar *target;
867    const UChar *targetLimit;
868    int32_t *offsets;
869
870    uint8_t *bytes;
871    uint8_t byteIndex;
872
873    int32_t length, targetCapacity;
874
875    /* UTF-7 state */
876    uint16_t bits;
877    int8_t base64Counter;
878    UBool inDirectMode;
879
880    int8_t base64Value;
881
882    int32_t sourceIndex, nextSourceIndex;
883
884    UChar c;
885    uint8_t b;
886
887    /* set up the local pointers */
888    cnv=pArgs->converter;
889
890    source=(const uint8_t *)pArgs->source;
891    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
892    target=pArgs->target;
893    targetLimit=pArgs->targetLimit;
894    offsets=pArgs->offsets;
895    /* get the state machine state */
896    {
897        uint32_t status=cnv->toUnicodeStatus;
898        inDirectMode=(UBool)((status>>24)&1);
899        base64Counter=(int8_t)(status>>16);
900        bits=(uint16_t)status;
901    }
902    bytes=cnv->toUBytes;
903    byteIndex=cnv->toULength;
904
905    /* sourceIndex=-1 if the current character began in the previous buffer */
906    sourceIndex=byteIndex==0 ? 0 : -1;
907    nextSourceIndex=0;
908
909    if(inDirectMode) {
910directMode:
911        /*
912         * In Direct Mode, US-ASCII characters are encoded directly, i.e.,
913         * with their US-ASCII byte values.
914         * An ampersand starts Unicode (or "escape") Mode.
915         *
916         * In Direct Mode, only the sourceIndex is used.
917         */
918        byteIndex=0;
919        length=(int32_t)(sourceLimit-source);
920        targetCapacity=(int32_t)(targetLimit-target);
921        if(length>targetCapacity) {
922            length=targetCapacity;
923        }
924        while(length>0) {
925            b=*source++;
926            if(!isLegalIMAP(b)) {
927                /* illegal */
928                bytes[0]=b;
929                byteIndex=1;
930                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
931                break;
932            } else if(b!=AMPERSAND) {
933                /* write directly encoded character */
934                *target++=b;
935                if(offsets!=NULL) {
936                    *offsets++=sourceIndex++;
937                }
938            } else /* AMPERSAND */ {
939                /* switch to Unicode mode */
940                nextSourceIndex=++sourceIndex;
941                inDirectMode=FALSE;
942                byteIndex=0;
943                bits=0;
944                base64Counter=-1;
945                goto unicodeMode;
946            }
947            --length;
948        }
949        if(source<sourceLimit && target>=targetLimit) {
950            /* target is full */
951            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
952        }
953    } else {
954unicodeMode:
955        /*
956         * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded.
957         * The base64 sequence ends with any character that is not in the base64 alphabet.
958         * A terminating minus sign is consumed.
959         * US-ASCII must not be base64-ed.
960         *
961         * In Unicode Mode, the sourceIndex has the index to the start of the current
962         * base64 bytes, while nextSourceIndex is precisely parallel to source,
963         * keeping the index to the following byte.
964         * Note that in 2 out of 3 cases, UChars overlap within a base64 byte.
965         */
966        while(source<sourceLimit) {
967            if(target<targetLimit) {
968                bytes[byteIndex++]=b=*source++;
969                ++nextSourceIndex;
970                if(b>0x7e) {
971                    /* illegal - test other illegal US-ASCII values by base64Value==-3 */
972                    inDirectMode=TRUE;
973                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
974                    break;
975                } else if((base64Value=FROM_BASE64_IMAP(b))>=0) {
976                    /* collect base64 bytes into UChars */
977                    switch(base64Counter) {
978                    case -1: /* -1 is immediately after the & */
979                    case 0:
980                        bits=base64Value;
981                        base64Counter=1;
982                        break;
983                    case 1:
984                    case 3:
985                    case 4:
986                    case 6:
987                        bits=(uint16_t)((bits<<6)|base64Value);
988                        ++base64Counter;
989                        break;
990                    case 2:
991                        c=(UChar)((bits<<4)|(base64Value>>2));
992                        if(isLegalIMAP(c)) {
993                            /* illegal */
994                            inDirectMode=TRUE;
995                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
996                            goto endloop;
997                        }
998                        *target++=c;
999                        if(offsets!=NULL) {
1000                            *offsets++=sourceIndex;
1001                            sourceIndex=nextSourceIndex-1;
1002                        }
1003                        bytes[0]=b; /* keep this byte in case an error occurs */
1004                        byteIndex=1;
1005                        bits=(uint16_t)(base64Value&3);
1006                        base64Counter=3;
1007                        break;
1008                    case 5:
1009                        c=(UChar)((bits<<2)|(base64Value>>4));
1010                        if(isLegalIMAP(c)) {
1011                            /* illegal */
1012                            inDirectMode=TRUE;
1013                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1014                            goto endloop;
1015                        }
1016                        *target++=c;
1017                        if(offsets!=NULL) {
1018                            *offsets++=sourceIndex;
1019                            sourceIndex=nextSourceIndex-1;
1020                        }
1021                        bytes[0]=b; /* keep this byte in case an error occurs */
1022                        byteIndex=1;
1023                        bits=(uint16_t)(base64Value&15);
1024                        base64Counter=6;
1025                        break;
1026                    case 7:
1027                        c=(UChar)((bits<<6)|base64Value);
1028                        if(isLegalIMAP(c)) {
1029                            /* illegal */
1030                            inDirectMode=TRUE;
1031                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1032                            goto endloop;
1033                        }
1034                        *target++=c;
1035                        if(offsets!=NULL) {
1036                            *offsets++=sourceIndex;
1037                            sourceIndex=nextSourceIndex;
1038                        }
1039                        byteIndex=0;
1040                        bits=0;
1041                        base64Counter=0;
1042                        break;
1043                    default:
1044                        /* will never occur */
1045                        break;
1046                    }
1047                } else if(base64Value==-2) {
1048                    /* minus sign terminates the base64 sequence */
1049                    inDirectMode=TRUE;
1050                    if(base64Counter==-1) {
1051                        /* &- i.e. a minus immediately following an ampersand */
1052                        *target++=AMPERSAND;
1053                        if(offsets!=NULL) {
1054                            *offsets++=sourceIndex-1;
1055                        }
1056                    } else {
1057                        /* absorb the minus and leave the Unicode Mode */
1058                        if(bits!=0 || (base64Counter!=0 && base64Counter!=3 && base64Counter!=6)) {
1059                            /* bits are illegally left over, a UChar is incomplete */
1060                            /* base64Counter other than 0, 3, 6 means non-minimal zero-padding, also illegal */
1061                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1062                            break;
1063                        }
1064                    }
1065                    sourceIndex=nextSourceIndex;
1066                    goto directMode;
1067                } else {
1068                    if(base64Counter==-1) {
1069                        /* illegal: & immediately followed by something other than base64 or minus sign */
1070                        /* include the ampersand in the reported sequence */
1071                        --sourceIndex;
1072                        bytes[0]=AMPERSAND;
1073                        bytes[1]=b;
1074                        byteIndex=2;
1075                    }
1076                    /* base64Value==-1 for characters that are illegal only in Unicode mode */
1077                    /* base64Value==-3 for illegal characters */
1078                    /* illegal */
1079                    inDirectMode=TRUE;
1080                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1081                    break;
1082                }
1083            } else {
1084                /* target is full */
1085                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1086                break;
1087            }
1088        }
1089    }
1090endloop:
1091
1092    /*
1093     * the end of the input stream and detection of truncated input
1094     * are handled by the framework, but here we must check if we are in Unicode
1095     * mode and byteIndex==0 because we must end in direct mode
1096     *
1097     * conditions:
1098     *   successful
1099     *   in Unicode mode and byteIndex==0
1100     *   end of input and no truncated input
1101     */
1102    if( U_SUCCESS(*pErrorCode) &&
1103        !inDirectMode && byteIndex==0 &&
1104        pArgs->flush && source>=sourceLimit
1105    ) {
1106        if(base64Counter==-1) {
1107            /* & at the very end of the input */
1108            /* make the ampersand the reported sequence */
1109            bytes[0]=AMPERSAND;
1110            byteIndex=1;
1111        }
1112        /* else if(base64Counter!=-1) byteIndex remains 0 because there is no particular byte sequence */
1113
1114        inDirectMode=TRUE; /* avoid looping */
1115        *pErrorCode=U_TRUNCATED_CHAR_FOUND;
1116    }
1117
1118    /* set the converter state back into UConverter */
1119    cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
1120    cnv->toULength=byteIndex;
1121
1122    /* write back the updated pointers */
1123    pArgs->source=(const char *)source;
1124    pArgs->target=target;
1125    pArgs->offsets=offsets;
1126    return;
1127}
1128
1129static void
1130_IMAPFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
1131                            UErrorCode *pErrorCode) {
1132    UConverter *cnv;
1133    const UChar *source, *sourceLimit;
1134    uint8_t *target, *targetLimit;
1135    int32_t *offsets;
1136
1137    int32_t length, targetCapacity, sourceIndex;
1138    UChar c;
1139    uint8_t b;
1140
1141    /* UTF-7 state */
1142    uint8_t bits;
1143    int8_t base64Counter;
1144    UBool inDirectMode;
1145
1146    /* set up the local pointers */
1147    cnv=pArgs->converter;
1148
1149    /* set up the local pointers */
1150    source=pArgs->source;
1151    sourceLimit=pArgs->sourceLimit;
1152    target=(uint8_t *)pArgs->target;
1153    targetLimit=(uint8_t *)pArgs->targetLimit;
1154    offsets=pArgs->offsets;
1155
1156    /* get the state machine state */
1157    {
1158        uint32_t status=cnv->fromUnicodeStatus;
1159        inDirectMode=(UBool)((status>>24)&1);
1160        base64Counter=(int8_t)(status>>16);
1161        bits=(uint8_t)status;
1162    }
1163
1164    /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
1165    sourceIndex=0;
1166
1167    if(inDirectMode) {
1168directMode:
1169        length=(int32_t)(sourceLimit-source);
1170        targetCapacity=(int32_t)(targetLimit-target);
1171        if(length>targetCapacity) {
1172            length=targetCapacity;
1173        }
1174        while(length>0) {
1175            c=*source++;
1176            /* encode 0x20..0x7e except '&' directly */
1177            if(inSetDIMAP(c)) {
1178                /* encode directly */
1179                *target++=(uint8_t)c;
1180                if(offsets!=NULL) {
1181                    *offsets++=sourceIndex++;
1182                }
1183            } else if(c==AMPERSAND) {
1184                /* output &- for & */
1185                *target++=AMPERSAND;
1186                if(target<targetLimit) {
1187                    *target++=MINUS;
1188                    if(offsets!=NULL) {
1189                        *offsets++=sourceIndex;
1190                        *offsets++=sourceIndex++;
1191                    }
1192                    /* realign length and targetCapacity */
1193                    goto directMode;
1194                } else {
1195                    if(offsets!=NULL) {
1196                        *offsets++=sourceIndex++;
1197                    }
1198                    cnv->charErrorBuffer[0]=MINUS;
1199                    cnv->charErrorBufferLength=1;
1200                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1201                    break;
1202                }
1203            } else {
1204                /* un-read this character and switch to Unicode Mode */
1205                --source;
1206                *target++=AMPERSAND;
1207                if(offsets!=NULL) {
1208                    *offsets++=sourceIndex;
1209                }
1210                inDirectMode=FALSE;
1211                base64Counter=0;
1212                goto unicodeMode;
1213            }
1214            --length;
1215        }
1216        if(source<sourceLimit && target>=targetLimit) {
1217            /* target is full */
1218            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1219        }
1220    } else {
1221unicodeMode:
1222        while(source<sourceLimit) {
1223            if(target<targetLimit) {
1224                c=*source++;
1225                if(isLegalIMAP(c)) {
1226                    /* encode directly */
1227                    inDirectMode=TRUE;
1228
1229                    /* trick: back out this character to make this easier */
1230                    --source;
1231
1232                    /* terminate the base64 sequence */
1233                    if(base64Counter!=0) {
1234                        /* write remaining bits for the previous character */
1235                        *target++=TO_BASE64_IMAP(bits);
1236                        if(offsets!=NULL) {
1237                            *offsets++=sourceIndex-1;
1238                        }
1239                    }
1240                    /* need to terminate with a minus */
1241                    if(target<targetLimit) {
1242                        *target++=MINUS;
1243                        if(offsets!=NULL) {
1244                            *offsets++=sourceIndex-1;
1245                        }
1246                    } else {
1247                        cnv->charErrorBuffer[0]=MINUS;
1248                        cnv->charErrorBufferLength=1;
1249                        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1250                        break;
1251                    }
1252                    goto directMode;
1253                } else {
1254                    /*
1255                     * base64 this character:
1256                     * Output 2 or 3 base64 bytes for the remaining bits of the previous character
1257                     * and the bits of this character, each implicitly in UTF-16BE.
1258                     *
1259                     * Here, bits is an 8-bit variable because only 6 bits need to be kept from one
1260                     * character to the next. The actual 2 or 4 bits are shifted to the left edge
1261                     * of the 6-bits field 5..0 to make the termination of the base64 sequence easier.
1262                     */
1263                    switch(base64Counter) {
1264                    case 0:
1265                        b=(uint8_t)(c>>10);
1266                        *target++=TO_BASE64_IMAP(b);
1267                        if(target<targetLimit) {
1268                            b=(uint8_t)((c>>4)&0x3f);
1269                            *target++=TO_BASE64_IMAP(b);
1270                            if(offsets!=NULL) {
1271                                *offsets++=sourceIndex;
1272                                *offsets++=sourceIndex++;
1273                            }
1274                        } else {
1275                            if(offsets!=NULL) {
1276                                *offsets++=sourceIndex++;
1277                            }
1278                            b=(uint8_t)((c>>4)&0x3f);
1279                            cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
1280                            cnv->charErrorBufferLength=1;
1281                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1282                        }
1283                        bits=(uint8_t)((c&15)<<2);
1284                        base64Counter=1;
1285                        break;
1286                    case 1:
1287                        b=(uint8_t)(bits|(c>>14));
1288                        *target++=TO_BASE64_IMAP(b);
1289                        if(target<targetLimit) {
1290                            b=(uint8_t)((c>>8)&0x3f);
1291                            *target++=TO_BASE64_IMAP(b);
1292                            if(target<targetLimit) {
1293                                b=(uint8_t)((c>>2)&0x3f);
1294                                *target++=TO_BASE64_IMAP(b);
1295                                if(offsets!=NULL) {
1296                                    *offsets++=sourceIndex;
1297                                    *offsets++=sourceIndex;
1298                                    *offsets++=sourceIndex++;
1299                                }
1300                            } else {
1301                                if(offsets!=NULL) {
1302                                    *offsets++=sourceIndex;
1303                                    *offsets++=sourceIndex++;
1304                                }
1305                                b=(uint8_t)((c>>2)&0x3f);
1306                                cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
1307                                cnv->charErrorBufferLength=1;
1308                                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1309                            }
1310                        } else {
1311                            if(offsets!=NULL) {
1312                                *offsets++=sourceIndex++;
1313                            }
1314                            b=(uint8_t)((c>>8)&0x3f);
1315                            cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
1316                            b=(uint8_t)((c>>2)&0x3f);
1317                            cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);
1318                            cnv->charErrorBufferLength=2;
1319                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1320                        }
1321                        bits=(uint8_t)((c&3)<<4);
1322                        base64Counter=2;
1323                        break;
1324                    case 2:
1325                        b=(uint8_t)(bits|(c>>12));
1326                        *target++=TO_BASE64_IMAP(b);
1327                        if(target<targetLimit) {
1328                            b=(uint8_t)((c>>6)&0x3f);
1329                            *target++=TO_BASE64_IMAP(b);
1330                            if(target<targetLimit) {
1331                                b=(uint8_t)(c&0x3f);
1332                                *target++=TO_BASE64_IMAP(b);
1333                                if(offsets!=NULL) {
1334                                    *offsets++=sourceIndex;
1335                                    *offsets++=sourceIndex;
1336                                    *offsets++=sourceIndex++;
1337                                }
1338                            } else {
1339                                if(offsets!=NULL) {
1340                                    *offsets++=sourceIndex;
1341                                    *offsets++=sourceIndex++;
1342                                }
1343                                b=(uint8_t)(c&0x3f);
1344                                cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
1345                                cnv->charErrorBufferLength=1;
1346                                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1347                            }
1348                        } else {
1349                            if(offsets!=NULL) {
1350                                *offsets++=sourceIndex++;
1351                            }
1352                            b=(uint8_t)((c>>6)&0x3f);
1353                            cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
1354                            b=(uint8_t)(c&0x3f);
1355                            cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);
1356                            cnv->charErrorBufferLength=2;
1357                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1358                        }
1359                        bits=0;
1360                        base64Counter=0;
1361                        break;
1362                    default:
1363                        /* will never occur */
1364                        break;
1365                    }
1366                }
1367            } else {
1368                /* target is full */
1369                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1370                break;
1371            }
1372        }
1373    }
1374
1375    if(pArgs->flush && source>=sourceLimit) {
1376        /* flush remaining bits to the target */
1377        if(!inDirectMode) {
1378            if(base64Counter!=0) {
1379                if(target<targetLimit) {
1380                    *target++=TO_BASE64_IMAP(bits);
1381                    if(offsets!=NULL) {
1382                        *offsets++=sourceIndex-1;
1383                    }
1384                } else {
1385                    cnv->charErrorBuffer[cnv->charErrorBufferLength++]=TO_BASE64_IMAP(bits);
1386                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1387                }
1388            }
1389            /* need to terminate with a minus */
1390            if(target<targetLimit) {
1391                *target++=MINUS;
1392                if(offsets!=NULL) {
1393                    *offsets++=sourceIndex-1;
1394                }
1395            } else {
1396                cnv->charErrorBuffer[cnv->charErrorBufferLength++]=MINUS;
1397                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1398            }
1399        }
1400        /* reset the state for the next conversion */
1401        cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
1402    } else {
1403        /* set the converter state back into UConverter */
1404        cnv->fromUnicodeStatus=
1405            (cnv->fromUnicodeStatus&0xf0000000)|    /* keep version*/
1406            ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits;
1407    }
1408
1409    /* write back the updated pointers */
1410    pArgs->source=source;
1411    pArgs->target=(char *)target;
1412    pArgs->offsets=offsets;
1413    return;
1414}
1415
1416static const UConverterImpl _IMAPImpl={
1417    UCNV_IMAP_MAILBOX,
1418
1419    NULL,
1420    NULL,
1421
1422    _UTF7Open,
1423    NULL,
1424    _UTF7Reset,
1425
1426    _IMAPToUnicodeWithOffsets,
1427    _IMAPToUnicodeWithOffsets,
1428    _IMAPFromUnicodeWithOffsets,
1429    _IMAPFromUnicodeWithOffsets,
1430    NULL,
1431
1432    NULL,
1433    NULL,
1434    NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */
1435    NULL,
1436    ucnv_getCompleteUnicodeSet
1437};
1438
1439static const UConverterStaticData _IMAPStaticData={
1440    sizeof(UConverterStaticData),
1441    "IMAP-mailbox-name",
1442    0, /* TODO CCSID for IMAP-mailbox-name */
1443    UCNV_IBM, UCNV_IMAP_MAILBOX,
1444    1, 4,
1445    { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */
1446    FALSE, FALSE,
1447    0,
1448    0,
1449    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
1450};
1451
1452const UConverterSharedData _IMAPData={
1453    sizeof(UConverterSharedData), ~((uint32_t)0),
1454    NULL, NULL, &_IMAPStaticData, FALSE, &_IMAPImpl,
1455    0
1456};
1457
1458#endif
1459