gsm.c revision 55f4e4a5ec657a017e3bf75299ad71fd1c968dd3
1/* Copyright (C) 2007-2008 The Android Open Source Project
2**
3** This software is licensed under the terms of the GNU General Public
4** License version 2, as published by the Free Software Foundation, and
5** may be copied, distributed, and modified under those terms.
6**
7** This program is distributed in the hope that it will be useful,
8** but WITHOUT ANY WARRANTY; without even the implied warranty of
9** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10** GNU General Public License for more details.
11*/
12#include "gsm.h"
13#include <stdlib.h>
14#include <string.h>
15
16/** UTILITIES
17 **/
18byte_t
19gsm_int_to_bcdi( int  value )
20{
21    return (byte_t)((value / 10) | ((value % 10) << 4));
22}
23
24int
25gsm_int_from_bcdi( byte_t  val )
26{
27    int  ret = 0;
28
29    if ((val & 0xf0) <= 0x90)
30        ret = (val >> 4);
31
32    if ((val & 0x0f) <= 0x90)
33        ret |= (val % 0xf)*10;
34
35    return ret;
36}
37
38
39static int
40gsm_bcdi_to_ascii( cbytes_t  bcd, int  bcdlen, bytes_t  dst )
41{
42    static byte_t  bcdichars[14] = "0123456789*#,N";
43
44    int  result = 0;
45    int  shift  = 0;
46
47    while (bcdlen > 0) {
48        int  c = (bcd[0] >> shift) & 0xf;
49
50        if (c == 0xf && bcdlen == 1)
51            break;
52
53        if (c < 14) {
54            if (dst) dst[result] = bcdichars[c];
55            result += 1;
56        }
57        bcdlen --;
58        shift += 4;
59        if (shift == 8) {
60            bcd++;
61            shift = 0;
62        }
63    }
64    return result;
65}
66
67
68static int
69gsm_bcdi_from_ascii( cbytes_t  ascii, int  asciilen, bytes_t  dst )
70{
71    cbytes_t  end    = ascii + asciilen;
72    int       result = 0;
73    int       phase  = 0x01;
74
75    while (ascii < end) {
76        int  c = *ascii++;
77
78        if (c == '*')
79            c = 11;
80        else if (c == '#')
81            c = 12;
82        else if (c == ',')
83            c = 13;
84        else if (c == 'N')
85            c = 14;
86        else {
87            c -= '0';
88            if ((unsigned)c >= 10)
89                break;
90        }
91        phase = (phase << 4) | c;
92        if (phase & 0x100) {
93            if (dst) dst[result] = (byte_t) phase;
94            result += 1;
95            phase   = 0x01;
96        }
97    }
98    if (phase != 0x01) {
99        if (dst) dst[result] = (byte_t)( phase | 0xf0 );
100        result += 1;
101    }
102    return  result;
103}
104
105
106int
107gsm_hexchar_to_int( char  c )
108{
109    if ((unsigned)(c - '0') < 10)
110        return c - '0';
111    if ((unsigned)(c - 'a') < 6)
112        return 10 + (c - 'a');
113    if ((unsigned)(c - 'A') < 6)
114        return 10 + (c - 'A');
115    return -1;
116}
117
118int
119gsm_hexchar_to_int0( char  c )
120{
121    int  ret = gsm_hexchar_to_int(c);
122
123    return (ret < 0) ? 0 : ret;
124}
125
126int
127gsm_hex2_to_byte( const char*  hex )
128{
129    int  hi = gsm_hexchar_to_int(hex[0]);
130    int  lo = gsm_hexchar_to_int(hex[1]);
131
132    if (hi < 0 || lo < 0)
133        return -1;
134
135    return ( (hi << 4) | lo );
136}
137
138int
139gsm_hex4_to_short( const char*  hex )
140{
141    int  hi = gsm_hex2_to_byte(hex);
142    int  lo = gsm_hex2_to_byte(hex+2);
143
144    if (hi < 0 || lo < 0)
145        return -1;
146
147    return ((hi << 8) | lo);
148}
149
150int
151gsm_hex2_to_byte0( const char*  hex )
152{
153    int  hi = gsm_hexchar_to_int0(hex[0]);
154    int  lo = gsm_hexchar_to_int0(hex[1]);
155
156    return (byte_t)( (hi << 4) | lo );
157}
158
159void
160gsm_hex_from_byte( char*  hex, int val )
161{
162    static const char  hexdigits[] = "0123456789abcdef";
163
164    hex[0] = hexdigits[(val >> 4) & 15];
165    hex[1] = hexdigits[val & 15];
166}
167
168void
169gsm_hex_from_short( char*  hex, int  val )
170{
171    gsm_hex_from_byte( hex,   (val >> 8) );
172    gsm_hex_from_byte( hex+2, val );
173}
174
175
176
177/** HEX
178 **/
179void
180gsm_hex_to_bytes0( cbytes_t  hex, int  hexlen, bytes_t  dst )
181{
182    int  nn;
183
184    for (nn = 0; nn < hexlen/2; nn++ ) {
185        dst[nn] = (byte_t) gsm_hex2_to_byte0( (const char*)hex+2*nn );
186    }
187    if (hexlen & 1) {
188        dst[nn] = gsm_hexchar_to_int0( hex[2*nn] ) << 4;
189    }
190}
191
192int
193gsm_hex_to_bytes( cbytes_t  hex, int  hexlen, bytes_t  dst )
194{
195    int  nn;
196
197    if (hexlen & 1)  /* must be even */
198        return -1;
199
200    for (nn = 0; nn < hexlen/2; nn++ ) {
201        int  c = gsm_hex2_to_byte( (const char*)hex+2*nn );
202        if (c < 0) return -1;
203        dst[nn] = (byte_t) c;
204    }
205    return hexlen/2;
206}
207
208void
209gsm_hex_from_bytes( char*  hex, cbytes_t  src, int  srclen )
210{
211    int  nn;
212
213    for (nn = 0; nn < srclen; nn++) {
214        gsm_hex_from_byte( hex + 2*nn, src[nn] );
215    }
216}
217
218/** ROPES
219 **/
220
221void
222gsm_rope_init( GsmRope  rope )
223{
224    rope->data  = NULL;
225    rope->pos   = 0;
226    rope->max   = 0;
227    rope->error = 0;
228}
229
230void
231gsm_rope_init_alloc( GsmRope  rope, int  count )
232{
233    rope->data  = rope->data0;
234    rope->pos   = 0;
235    rope->max   = sizeof(rope->data0);
236    rope->error = 0;
237
238    if (count > 0) {
239        rope->data = calloc( count, 1 );
240        rope->max  = count;
241
242        if (rope->data == NULL) {
243            rope->error = 1;
244            rope->max   = 0;
245        }
246    }
247}
248
249int
250gsm_rope_done( GsmRope  rope )
251{
252    int  result = rope->error;
253
254    if (rope->data && rope->data != rope->data0)
255        free(rope->data);
256
257    rope->data  = NULL;
258    rope->pos   = 0;
259    rope->max   = 0;
260    rope->error = 0;
261
262    return result;
263}
264
265
266bytes_t
267gsm_rope_done_acquire( GsmRope  rope, int  *psize )
268{
269    bytes_t  result = rope->data;
270
271    *psize = rope->pos;
272    if (result == rope->data0) {
273        result = malloc(  rope->pos );
274        if (result != NULL)
275            memcpy( result, rope->data, rope->pos );
276    }
277    return result;
278}
279
280
281int
282gsm_rope_ensure( GsmRope  rope, int  new_count )
283{
284    if (rope->data != NULL) {
285        int       old_max  = rope->max;
286        bytes_t   old_data = rope->data == rope->data0 ? NULL : rope->data;
287        int       new_max  = old_max;
288        bytes_t   new_data;
289
290        while (new_max < new_count) {
291            new_max += (new_max >> 1) + 4;
292        }
293        new_data = realloc( old_data, new_max );
294        if (new_data == NULL) {
295            rope->error = 1;
296            return -1;
297        }
298        rope->data = new_data;
299        rope->max  = new_max;
300    } else {
301        rope->max = new_count;
302    }
303    return 0;
304}
305
306static int
307gsm_rope_can_grow( GsmRope  rope, int  count )
308{
309    if (!rope->data || rope->error)
310        return 0;
311
312    if (rope->pos + count > rope->max)
313    {
314        if (rope->data == NULL)
315            rope->max = rope->pos + count;
316
317        else if (rope->error ||
318                 gsm_rope_ensure( rope, rope->pos + count ) < 0)
319            return 0;
320    }
321    return 1;
322}
323
324void
325gsm_rope_add_c( GsmRope  rope,  char  c )
326{
327    if (gsm_rope_can_grow(rope, 1)) {
328        rope->data[ rope->pos ] = (byte_t) c;
329    }
330    rope->pos += 1;
331}
332
333void
334gsm_rope_add( GsmRope  rope, const void*  buf, int  buflen )
335{
336    if (gsm_rope_can_grow(rope, buflen)) {
337        memcpy( rope->data + rope->pos, (const char*)buf, buflen );
338    }
339    rope->pos += buflen;
340}
341
342void*
343gsm_rope_reserve( GsmRope  rope, int  count )
344{
345    void*  result = NULL;
346
347    if (gsm_rope_can_grow(rope, count))
348    {
349        if (rope->data != NULL)
350            result = rope->data + rope->pos;
351    }
352    rope->pos += count;
353
354    return result;
355}
356
357/* skip a given number of Unicode characters in a utf-8 byte string */
358cbytes_t
359utf8_skip( cbytes_t   utf8,
360           cbytes_t   utf8end,
361           int        count)
362{
363    cbytes_t  p   = utf8;
364    cbytes_t  end = utf8end;
365
366    for ( ; count > 0; count-- ) {
367        int  c;
368
369        if (p >= end)
370            break;
371
372        c = *p++;
373        if (c > 128) {
374            while (p < end && (p[0] & 0xc0) == 0x80)
375                p++;
376        }
377    }
378    return  p;
379}
380
381
382static __inline__ int
383utf8_next( cbytes_t  *pp, cbytes_t  end )
384{
385    cbytes_t  p      = *pp;
386    int       result = -1;
387
388    if (p < end) {
389        int  c= *p++;
390        if (c >= 128) {
391            if ((c & 0xe0) == 0xc0)
392                c &= 0x1f;
393            else if ((c & 0xf0) == 0xe0)
394                c &= 0x0f;
395            else
396                c &= 0x07;
397
398            while (p < end && (p[0] & 0xc0) == 0x80) {
399                c = (c << 6) | (p[0] & 0x3f);
400                p ++;
401            }
402        }
403        result = c;
404        *pp    = p;
405    }
406    return result;
407}
408
409
410__inline__ int
411utf8_write( bytes_t  utf8, int  offset, int  v )
412{
413    int  result;
414
415    if (v < 128) {
416        result = 1;
417        if (utf8)
418            utf8[offset] = (byte_t) v;
419    } else if (v < 0x800) {
420        result = 2;
421        if (utf8) {
422            utf8[offset+0] = (byte_t)( 0xc0 | (v >> 6) );
423            utf8[offset+1] = (byte_t)( 0x80 | (v & 0x3f) );
424        }
425    } else if (v < 0x10000) {
426        result = 3;
427        if (utf8) {
428            utf8[offset+0] = (byte_t)( 0xe0 |  (v >> 12) );
429            utf8[offset+1] = (byte_t)( 0x80 | ((v >> 6) & 0x3f) );
430            utf8[offset+2] = (byte_t)( 0x80 |  (v & 0x3f) );
431        }
432    } else {
433        result = 4;
434        if (utf8) {
435            utf8[offset+0] = (byte_t)( 0xf0 | ((v >> 18) & 0x7) );
436            utf8[offset+1] = (byte_t)( 0x80 | ((v >> 12) & 0x3f) );
437            utf8[offset+2] = (byte_t)( 0x80 | ((v >> 6) & 0x3f) );
438            utf8[offset+3] = (byte_t)( 0x80 |  (v & 0x3f) );
439        }
440    }
441    return  result;
442}
443
444static __inline__ int
445ucs2_write( bytes_t  ucs2, int  offset, int  v )
446{
447    if (ucs2) {
448        ucs2[offset+0] = (byte_t) (v >> 8);
449        ucs2[offset+1] = (byte_t) (v);
450    }
451    return 2;
452}
453
454int
455utf8_check( cbytes_t   p, int  utf8len )
456{
457    cbytes_t  end    = p + utf8len;
458    int       result = 0;
459
460    if (p) {
461        while (p < end) {
462            int  c = *p++;
463            if (c >= 128) {
464                int  len;
465                if ((c & 0xe0) == 0xc0) {
466                    len = 1;
467                }
468                else if ((c & 0xf0) == 0xe0) {
469                    len = 2;
470                }
471                else if ((c & 0xf8) == 0xf0) {
472                    len = 3;
473                }
474                else
475                    goto Exit;  /* malformed utf-8 */
476
477                if (p+len > end) /* string too short */
478                    goto Exit;
479
480                for ( ; len > 0; len--, p++ ) {
481                    if ((p[0] & 0xc0) != 0x80)
482                        goto Exit;
483                }
484            }
485        }
486        result = 1;
487    }
488Exit:
489    return result;
490}
491
492/** UCS2 to UTF8
493 **/
494
495/* convert a UCS2 string into a UTF8 byte string, assumes 'buf' is correctly sized */
496int
497ucs2_to_utf8( cbytes_t  ucs2,
498              int       ucs2len,
499              bytes_t   buf )
500{
501    int  nn;
502    int  result = 0;
503
504    for (nn = 0; nn < ucs2len; ucs2 += 2, nn++) {
505        int  c= (ucs2[0] << 8) | ucs2[1];
506        result += utf8_write(buf, result, c);
507    }
508    return result;
509}
510
511/* count the number of UCS2 chars contained in a utf8 byte string */
512int
513utf8_to_ucs2( cbytes_t  utf8,
514              int       utf8len,
515              bytes_t   ucs2 )
516{
517    cbytes_t  p      = utf8;
518    cbytes_t  end    = p + utf8len;
519    int       result = 0;
520
521    while (p < end) {
522        int  c = utf8_next(&p, end);
523
524        if (c < 0)
525            break;
526
527        result += ucs2_write(ucs2, result, c);
528    }
529    return result/2;
530}
531
532
533
534/** GSM ALPHABET
535 **/
536
537#define  GSM_7BITS_ESCAPE   0x1b
538#define  GSM_7BITS_UNKNOWN  0
539
540static const unsigned short   gsm7bits_to_unicode[128] = {
541  '@', 0xa3,  '$', 0xa5, 0xe8, 0xe9, 0xf9, 0xec, 0xf2, 0xc7, '\n', 0xd8, 0xf8, '\r', 0xc5, 0xe5,
5420x394,  '_',0x3a6,0x393,0x39b,0x3a9,0x3a0,0x3a8,0x3a3,0x398,0x39e,    0, 0xc6, 0xe6, 0xdf, 0xc9,
543  ' ',  '!',  '"',  '#', 0xa4,  '%',  '&', '\'',  '(',  ')',  '*',  '+',  ',',  '-',  '.',  '/',
544  '0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',  '8',  '9',  ':',  ';',  '<',  '=',  '>',  '?',
545 0xa1,  'A',  'B',  'C',  'D',  'E',  'F',  'G',  'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
546  'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',  'X',  'Y',  'Z', 0xc4, 0xd6,0x147, 0xdc, 0xa7,
547 0xbf,  'a',  'b',  'c',  'd',  'e',  'f',  'g',  'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
548  'p',  'q',  'r',  's',  't',  'u',  'v',  'w',  'x',  'y',  'z', 0xe4, 0xf6, 0xf1, 0xfc, 0xe0,
549};
550
551static const unsigned short  gsm7bits_extend_to_unicode[128] = {
552    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,'\f',   0,   0,   0,   0,   0,
553    0,   0,   0,   0, '^',   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
554    0,   0,   0,   0,   0,   0,   0,   0, '{', '}',   0,   0,   0,   0,   0,'\\',
555    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, '[', '~', ']',   0,
556  '|',   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
557    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
558    0,   0,   0,   0,   0,0x20ac, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
559    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
560};
561
562
563static int
564unichar_to_gsm7( int  unicode )
565{
566    int  nn;
567    for (nn = 0; nn < 128; nn++) {
568        if (gsm7bits_to_unicode[nn] == unicode) {
569            return nn;
570        }
571    }
572    return -1;
573}
574
575static int
576unichar_to_gsm7_extend( int  unichar )
577{
578    int  nn;
579    for (nn = 0; nn < 128; nn++) {
580        if (gsm7bits_extend_to_unicode[nn] == unichar) {
581            return nn;
582        }
583    }
584    return -1;
585}
586
587
588/* return the number of septets needed to encode a unicode charcode */
589static int
590unichar_to_gsm7_count( int  unicode )
591{
592    int  nn;
593
594    nn = unichar_to_gsm7(unicode);
595    if (nn >= 0)
596        return 1;
597
598    nn = unichar_to_gsm7_extend(unicode);
599    if (nn >= 0)
600        return 2;
601
602    return 0;
603}
604
605
606cbytes_t
607utf8_skip_gsm7( cbytes_t  utf8, cbytes_t  utf8end, int  gsm7len )
608{
609    cbytes_t  p   = utf8;
610    cbytes_t  end = utf8end;
611
612    while (gsm7len >0) {
613        cbytes_t  q = p;
614        int       c = utf8_next( &q, end );
615        int       len;
616
617        if (c < 0)
618            break;
619
620        len = unichar_to_gsm7_count( c );
621        if (len == 0)  /* unknown chars are replaced by spaces */
622            len = 1;
623
624        if (len > gsm7len)
625            break;
626
627        gsm7len -= len;
628        p        = q;
629    }
630    return  p;
631}
632
633
634int
635utf8_check_gsm7( cbytes_t  utf8,
636                 int       utf8len )
637{
638    cbytes_t  utf8end = utf8 + utf8len;
639
640    while (utf8 < utf8end) {
641        int  c = utf8_next( &utf8, utf8end );
642        if (unichar_to_gsm7_count(c) == 0)
643            return 0;
644    }
645    return 1;
646}
647
648
649int
650utf8_from_gsm7( cbytes_t  src,
651                int       septet_offset,
652                int       septet_count,
653                bytes_t   utf8 )
654{
655    int  shift   = (septet_offset & 7);
656    int  escaped = 0;
657    int  result  = 0;
658
659    src += (septet_offset >> 3);
660    for ( ; septet_count > 0; septet_count-- )
661    {
662        int  c = (src[0] >> shift) & 0x7f;
663        int  v;
664
665        if (shift > 1) {
666            c = ((src[1] << (8-shift)) | c) & 0x7f;
667        }
668
669        if (escaped) {
670            v = gsm7bits_extend_to_unicode[c];
671        } else if (c == GSM_7BITS_ESCAPE) {
672            escaped = 1;
673            goto NextSeptet;
674        } else {
675            v = gsm7bits_to_unicode[c];
676        }
677
678        result += utf8_write( utf8, result, v );
679
680    NextSeptet:
681        shift += 7;
682        if (shift >= 8) {
683            shift -= 8;
684            src   += 1;
685        }
686    }
687    return  result;
688}
689
690
691int
692utf8_from_gsm8( cbytes_t  src, int  count, bytes_t  utf8 )
693{
694    int  result  = 0;
695    int  escaped = 0;
696
697
698    for ( ; count > 0; count-- )
699    {
700        int  c = *src++;
701
702        if (c == 0xff)
703            break;
704
705        if (c == GSM_7BITS_ESCAPE) {
706            if (escaped) { /* two escape characters => one space */
707                c = 0x20;
708                escaped = 0;
709            } else {
710                escaped = 1;
711                continue;
712            }
713        }
714        else
715        {
716            if (c >= 0x80) {
717                c       = 0x20;
718                escaped = 0;
719            } else if (escaped) {
720                c = gsm7bits_extend_to_unicode[c];
721            } else
722                c = gsm7bits_to_unicode[c];
723        }
724
725        result += utf8_write( utf8, result, c );
726    }
727    return  result;
728}
729
730/* convert a GSM 7-bit message into a unicode character array
731 * the 'dst' array must contain at least 160 chars. the function
732 * returns the number of characters decoded
733 *
734 * assumes the 'dst' array has at least septet_count items, returns the
735 * number of unichars really written
736 */
737int
738ucs2_from_gsm7( bytes_t   ucs2,
739                cbytes_t  src,
740                int       septet_offset,
741                int       septet_count )
742{
743    const unsigned char*  p     = src + (septet_offset >> 3);
744    int                   shift = (septet_offset & 7);
745    int                   escaped = 0;
746    int                   result  = 0;
747
748    for ( ; septet_count > 0; septet_count-- )
749    {
750        unsigned  val  = (p[0] >> shift) & 0x7f;
751
752        if (shift > 1)
753            val = (val | (p[1] << (8-shift))) & 0x7f;
754
755        if (escaped) {
756            int  c = gsm7bits_to_unicode[val];
757
758            result += ucs2_write(ucs2, result, c);
759            escaped = 0;
760        }
761        else if (val == GSM_7BITS_ESCAPE) {
762            escaped = 1;
763        }
764        else {
765            val = gsm7bits_extend_to_unicode[val];
766            if (val == 0)
767                val = 0x20;
768
769            result += ucs2_write( ucs2, result, val );
770        }
771    }
772    return result/2;
773}
774
775
776/* count the number of septets required to write a utf8 string */
777static int
778utf8_to_gsm7_count( cbytes_t  utf8, int  utf8len )
779{
780    cbytes_t  utf8end = utf8 + utf8len;
781    int       result  = 0;
782
783    while ( utf8 < utf8end ) {
784        int  len;
785        int  c = utf8_next( &utf8, utf8end );
786
787        if (c < 0)
788            break;
789
790        len = unichar_to_gsm7_count(c);
791        if (len == 0)    /* replace non-representables with space */
792            len = 1;
793
794        result += len;
795    }
796    return result;
797}
798
799typedef struct {
800    bytes_t   dst;
801    unsigned  pad;
802    int       bits;
803    int       offset;
804} BWriterRec, *BWriter;
805
806static void
807bwriter_init( BWriter  writer, bytes_t  dst, int  start )
808{
809    int  shift = start & 7;
810
811    writer->dst    = dst + (start >> 3);
812    writer->pad    = 0;
813    writer->bits   = shift;
814    writer->offset = start;
815
816    if (shift > 0) {
817        writer->pad  = writer->dst[0] & ~(0xFF << shift);
818    }
819}
820
821static void
822bwriter_add7( BWriter  writer, unsigned  value )
823{
824    writer->pad  |= (unsigned)(value << writer->bits);
825    writer->bits += 7;
826    if (writer->bits >= 8) {
827        writer->dst[0] = (byte_t)writer->pad;
828        writer->bits  -= 8;
829        writer->pad  >>= 8;
830        writer->dst   += 1;
831    }
832    writer->offset += 7;
833}
834
835static int
836bwriter_done( BWriter  writer )
837{
838    if (writer->bits > 0) {
839        writer->dst[0] = (byte_t)writer->pad;
840        writer->pad    = 0;
841        writer->bits   = 0;
842        writer->dst   += 1;
843    }
844    return writer->offset;
845}
846
847/* convert a utf8 string to a gsm7 byte string - return the number of septets written */
848int
849utf8_to_gsm7( cbytes_t  utf8, int  utf8len, bytes_t  dst, int offset )
850{
851    const unsigned char*  utf8end = utf8 + utf8len;
852    BWriterRec            writer[1];
853
854    if (dst == NULL)
855        return utf8_to_gsm7_count(utf8, utf8len);
856
857    bwriter_init( writer, dst, offset );
858    while ( utf8 < utf8end ) {
859        int  c = utf8_next( &utf8, utf8end );
860        int  nn;
861
862        if (c < 0)
863            break;
864
865        nn = unichar_to_gsm7(c);
866        if (nn >= 0) {
867            bwriter_add7( writer, nn );
868            continue;
869        }
870
871        nn = unichar_to_gsm7_extend(c);
872        if (nn >= 0) {
873            bwriter_add7( writer, GSM_7BITS_ESCAPE );
874            bwriter_add7( writer, nn );
875            continue;
876        }
877
878        /* unknown => replaced by space */
879        bwriter_add7( writer, 0x20 );
880    }
881    return  bwriter_done( writer );
882}
883
884
885int
886utf8_to_gsm8( cbytes_t  utf8, int  utf8len, bytes_t  dst )
887{
888    const unsigned char*  utf8end = utf8 + utf8len;
889    int                   result  = 0;
890
891    while ( utf8 < utf8end ) {
892        int  c = utf8_next( &utf8, utf8end );
893        int  nn;
894
895        if (c < 0)
896            break;
897
898        nn = unichar_to_gsm7(c);
899        if (nn >= 0) {
900            if (dst)
901                dst[result] = (byte_t)nn;
902            result += 1;
903            continue;
904        }
905
906        nn = unichar_to_gsm7_extend(c);
907        if (nn >= 0) {
908            if (dst) {
909                dst[result+0] = (byte_t) GSM_7BITS_ESCAPE;
910                dst[result+1] = (byte_t) nn;
911            }
912            result += 2;
913            continue;
914        }
915
916        /* unknown => space */
917        if (dst)
918            dst[result] = 0x20;
919        result += 1;
920    }
921    return  result;
922}
923
924
925int
926ucs2_to_gsm7( cbytes_t  ucs2, int  ucs2len, bytes_t  dst, int offset )
927{
928    const unsigned char*  ucs2end = ucs2 + ucs2len*2;
929    BWriterRec            writer[1];
930
931    bwriter_init( writer, dst, offset );
932    while ( ucs2 < ucs2end ) {
933        int  c = *ucs2++;
934        int  nn;
935
936        for (nn = 0; nn < 128; nn++) {
937            if ( gsm7bits_to_unicode[nn] == c ) {
938                bwriter_add7( writer, nn );
939                goto NextUnicode;
940            }
941        }
942        for (nn = 0; nn < 128; nn++) {
943            if ( gsm7bits_extend_to_unicode[nn] == c ) {
944                bwriter_add7( writer, GSM_7BITS_ESCAPE );
945                bwriter_add7( writer, nn );
946                goto NextUnicode;
947            }
948        }
949
950        /* unknown */
951        bwriter_add7( writer, 0x20 );
952
953    NextUnicode:
954        ;
955    }
956    return  bwriter_done( writer );
957}
958
959
960int
961ucs2_to_gsm8( cbytes_t  ucs2, int  ucs2len, bytes_t  dst )
962{
963    const unsigned char*  ucs2end = ucs2 + ucs2len*2;
964    bytes_t               dst0    = dst;
965
966    while ( ucs2 < ucs2end ) {
967        int  c = *ucs2++;
968        int  nn;
969
970        for (nn = 0; nn < 128; nn++) {
971            if ( gsm7bits_to_unicode[nn] == c ) {
972                *dst++ = (byte_t)nn;
973                goto NextUnicode;
974            }
975        }
976        for (nn = 0; nn < 128; nn++) {
977            if ( gsm7bits_extend_to_unicode[nn] == c ) {
978                dst[0] = (byte_t) GSM_7BITS_ESCAPE;
979                dst[1] = (byte_t) nn;
980                dst   += 2;
981                goto NextUnicode;
982            }
983        }
984
985        /* unknown */
986        *dst++ = 0x20;
987
988    NextUnicode:
989        ;
990    }
991    return (dst - dst0);
992}
993
994int
995gsm_bcdnum_to_ascii( cbytes_t  bcd, int  count, bytes_t  dst )
996{
997    int  result = 0;
998    int  shift  = 0;
999
1000    while (count > 0) {
1001        int  c = (bcd[0] >> shift) & 0xf;
1002
1003        if (c == 15 && count == 1)  /* ignore trailing 0xf */
1004            break;
1005
1006        if (c >= 14)
1007            c = 0;
1008
1009        if (dst) dst[result] = "0123456789*#,N"[c];
1010        result += 1;
1011
1012        shift += 4;
1013        if (shift == 8) {
1014            shift = 0;
1015            bcd += 1;
1016        }
1017    }
1018    return  result;
1019}
1020
1021
1022int
1023gsm_bcdnum_from_ascii( cbytes_t  ascii, int  asciilen, bytes_t  dst )
1024{
1025    cbytes_t  end = ascii + asciilen;
1026    int  result   = 0;
1027    int  phase = 0x01;
1028
1029    while (ascii < end) {
1030        int  c = *ascii++;
1031
1032        if (c == '*')
1033            c = 10;
1034        else if (c == '#')
1035            c = 11;
1036        else if (c == ',')
1037            c = 12;
1038        else if (c == 'N')
1039            c = 13;
1040        else {
1041            c -= '0';
1042            if ((unsigned)c >= 10U)
1043                return -1;
1044        }
1045        phase   = (phase << 4) | c;
1046        result += 1;
1047        if (phase & 0x100) {
1048            if (dst) dst[result/2] = (byte_t) phase;
1049            phase   = 0x01;
1050        }
1051    }
1052
1053    if (result & 1) {
1054        if (dst) dst[result/2] = (byte_t)(phase | 0xf0);
1055    }
1056    return result;
1057}
1058
1059/** ADN: Abbreviated Dialing Number
1060 **/
1061
1062#define  ADN_FOOTER_SIZE     14
1063#define  ADN_OFFSET_NUMBER_LENGTH   0
1064#define  ADN_OFFSET_TON_NPI         1
1065#define  ADN_OFFSET_NUMBER_START    2
1066#define  ADN_OFFSET_NUMBER_END      11
1067#define  ADN_OFFSET_CAPABILITY_ID   12
1068#define  ADN_OFFSET_EXTENSION_ID    13
1069
1070/* see 10.5.1 of 3GPP 51.011 */
1071static int
1072sim_adn_alpha_to_utf8( cbytes_t  alpha, cbytes_t  end, bytes_t  dst )
1073{
1074    int  result = 0;
1075
1076    /* ignore trailing 0xff */
1077    while (alpha < end && end[-1] == 0xff)
1078        end--;
1079
1080    if (alpha >= end)
1081        return 0;
1082
1083    if (alpha[0] == 0x80) { /* UCS/2 source encoding */
1084        alpha += 1;
1085        result = ucs2_to_utf8( alpha, (end-alpha)/2, dst );
1086    }
1087    else
1088    {
1089        int  is_ucs2 = 0;
1090        int  len = 0, base = 0;
1091
1092        if (alpha+3 <= end && alpha[0] == 0x81) {
1093            is_ucs2 = 1;
1094            len     = alpha[1];
1095            base    = alpha[2] << 7;
1096            alpha  += 3;
1097            if (len > end-alpha)
1098                len = end-alpha;
1099        } else if (alpha+4 <= end && alpha[0] == 0x82) {
1100            is_ucs2 = 1;
1101            len     = alpha[1];
1102            base    = (alpha[2] << 8) | alpha[3];
1103            alpha  += 4;
1104            if (len > end-alpha)
1105                len = end-alpha;
1106        }
1107
1108        if (is_ucs2) {
1109            end = alpha + len;
1110            while (alpha < end) {
1111                int  c = alpha[0];
1112                if (c >= 0x80) {
1113                    result += utf8_write(dst, result, base + (c & 0x7f));
1114                    alpha  += 1;
1115                } else {
1116                    /* GSM character set */
1117                    int   count;
1118                    for (count = 0; alpha+count < end && alpha[count] < 128; count++)
1119                        ;
1120                    result += utf8_from_gsm8(alpha, count, (dst ? dst+result : NULL));
1121                    alpha  += count;
1122                }
1123            }
1124        }
1125        else {
1126            result = utf8_from_gsm8(alpha, end-alpha, dst);
1127        }
1128    }
1129    return result;
1130}
1131
1132static int
1133sim_adn_alpha_from_utf8( cbytes_t  utf8, int  utf8len, bytes_t  dst )
1134{
1135    int   result = 0;
1136
1137    if (utf8_check_gsm7(utf8, utf8len)) {
1138        /* GSM 7-bit compatible, encode directly as 8-bit string */
1139        result = utf8_to_gsm8(utf8, utf8len, dst);
1140    } else {
1141        /* otherwise, simply try UCS-2 encoding, nothing more serious at the moment */
1142        if (dst) {
1143            dst[0] = 0x80;
1144        }
1145        result = 1 + utf8_to_ucs2(utf8, utf8len, dst ? (dst+1) : NULL)*2;
1146    }
1147    return  result;
1148}
1149
1150int
1151sim_adn_record_from_bytes( SimAdnRecord  rec, cbytes_t  data, int  len )
1152{
1153    cbytes_t  end    = data + len;
1154    cbytes_t  footer = end - ADN_FOOTER_SIZE;
1155    int       num_len;
1156
1157    rec->adn.alpha[0]  = 0;
1158    rec->adn.number[0] = 0;
1159    rec->ext_record    = 0xff;
1160
1161    if (len < ADN_FOOTER_SIZE)
1162        return -1;
1163
1164    /* alpha is optional */
1165    if (len > ADN_FOOTER_SIZE) {
1166        cbytes_t  dataend = data + len - ADN_FOOTER_SIZE;
1167        int       count   = sim_adn_alpha_to_utf8(data, dataend, NULL);
1168
1169        if (count > sizeof(rec->adn.alpha)-1)  /* too long */
1170            return -1;
1171
1172        sim_adn_alpha_to_utf8(data, dataend, rec->adn.alpha);
1173        rec->adn.alpha[count] = 0;
1174    }
1175
1176    num_len = footer[ADN_OFFSET_NUMBER_LENGTH];
1177    if (num_len > 11)
1178        return -1;
1179
1180    /* decode TON and number to ASCII, NOTE: this is lossy !! */
1181    {
1182        int      ton    = footer[ADN_OFFSET_TON_NPI];
1183        bytes_t  number = (bytes_t) rec->adn.number;
1184        int      len    = sizeof(rec->adn.number)-1;
1185        int      count;
1186
1187        if (ton != 0x81 && ton != 0x91)
1188            return -1;
1189
1190        if (ton == 0x91) {
1191            *number++ = '+';
1192            len      -= 1;
1193        }
1194
1195        count = gsm_bcdnum_to_ascii( footer + ADN_OFFSET_NUMBER_START,
1196                                     num_len*2, number );
1197        number[count] = 0;
1198    }
1199    return 0;
1200}
1201
1202int
1203sim_adn_record_to_bytes( SimAdnRecord  rec, bytes_t   data, int  datalen )
1204{
1205    bytes_t   end    = data + datalen;
1206    bytes_t   footer = end - ADN_FOOTER_SIZE;
1207    int       ton    = 0x81;
1208    cbytes_t  number = (cbytes_t) rec->adn.number;
1209
1210    if (number[0] == '+') {
1211        ton     = 0x91;
1212        number += 1;
1213    }
1214    footer[0] = (strlen((const char*)number)+1)/2 + 1;
1215    /* XXXX: TODO */
1216    return 0;
1217}
1218