gsm.c revision c2db2b6accc7888df514261a7240e7759df95a4c
1/* Copyright (C) 2007-2008 The Android Open Source Project
2**
3** This software is licensed under the terms of the GNU General Public
4** License version 2, as published by the Free Software Foundation, and
5** may be copied, distributed, and modified under those terms.
6**
7** This program is distributed in the hope that it will be useful,
8** but WITHOUT ANY WARRANTY; without even the implied warranty of
9** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10** GNU General Public License for more details.
11*/
12#include "gsm.h"
13#include <stdlib.h>
14#include <string.h>
15
16/** UTILITIES
17 **/
18byte_t
19gsm_int_to_bcdi( int  value )
20{
21    return (byte_t)((value / 10) | ((value % 10) << 4));
22}
23
24int
25gsm_int_from_bcdi( byte_t  val )
26{
27    int  ret = 0;
28
29    if ((val & 0xf0) <= 0x90)
30        ret = (val >> 4);
31
32    if ((val & 0x0f) <= 0x90)
33        ret |= (val % 0xf)*10;
34
35    return ret;
36}
37
38#if 0
39static int
40gsm_bcdi_to_ascii( cbytes_t  bcd, int  bcdlen, bytes_t  dst )
41{
42    static byte_t  bcdichars[14] = "0123456789*#,N";
43
44    int  result = 0;
45    int  shift  = 0;
46
47    while (bcdlen > 0) {
48        int  c = (bcd[0] >> shift) & 0xf;
49
50        if (c == 0xf && bcdlen == 1)
51            break;
52
53        if (c < 14) {
54            if (dst) dst[result] = bcdichars[c];
55            result += 1;
56        }
57        bcdlen --;
58        shift += 4;
59        if (shift == 8) {
60            bcd++;
61            shift = 0;
62        }
63    }
64    return result;
65}
66#endif
67
68#if 0
69static int
70gsm_bcdi_from_ascii( cbytes_t  ascii, int  asciilen, bytes_t  dst )
71{
72    cbytes_t  end    = ascii + asciilen;
73    int       result = 0;
74    int       phase  = 0x01;
75
76    while (ascii < end) {
77        int  c = *ascii++;
78
79        if (c == '*')
80            c = 11;
81        else if (c == '#')
82            c = 12;
83        else if (c == ',')
84            c = 13;
85        else if (c == 'N')
86            c = 14;
87        else {
88            c -= '0';
89            if ((unsigned)c >= 10)
90                break;
91        }
92        phase = (phase << 4) | c;
93        if (phase & 0x100) {
94            if (dst) dst[result] = (byte_t) phase;
95            result += 1;
96            phase   = 0x01;
97        }
98    }
99    if (phase != 0x01) {
100        if (dst) dst[result] = (byte_t)( phase | 0xf0 );
101        result += 1;
102    }
103    return  result;
104}
105#endif
106
107int
108gsm_hexchar_to_int( char  c )
109{
110    if ((unsigned)(c - '0') < 10)
111        return c - '0';
112    if ((unsigned)(c - 'a') < 6)
113        return 10 + (c - 'a');
114    if ((unsigned)(c - 'A') < 6)
115        return 10 + (c - 'A');
116    return -1;
117}
118
119int
120gsm_hexchar_to_int0( char  c )
121{
122    int  ret = gsm_hexchar_to_int(c);
123
124    return (ret < 0) ? 0 : ret;
125}
126
127int
128gsm_hex2_to_byte( const char*  hex )
129{
130    int  hi = gsm_hexchar_to_int(hex[0]);
131    int  lo = gsm_hexchar_to_int(hex[1]);
132
133    if (hi < 0 || lo < 0)
134        return -1;
135
136    return ( (hi << 4) | lo );
137}
138
139int
140gsm_hex4_to_short( const char*  hex )
141{
142    int  hi = gsm_hex2_to_byte(hex);
143    int  lo = gsm_hex2_to_byte(hex+2);
144
145    if (hi < 0 || lo < 0)
146        return -1;
147
148    return ((hi << 8) | lo);
149}
150
151int
152gsm_hex2_to_byte0( const char*  hex )
153{
154    int  hi = gsm_hexchar_to_int0(hex[0]);
155    int  lo = gsm_hexchar_to_int0(hex[1]);
156
157    return (byte_t)( (hi << 4) | lo );
158}
159
160void
161gsm_hex_from_byte( char*  hex, int val )
162{
163    static const char  hexdigits[] = "0123456789abcdef";
164
165    hex[0] = hexdigits[(val >> 4) & 15];
166    hex[1] = hexdigits[val & 15];
167}
168
169void
170gsm_hex_from_short( char*  hex, int  val )
171{
172    gsm_hex_from_byte( hex,   (val >> 8) );
173    gsm_hex_from_byte( hex+2, val );
174}
175
176
177
178/** HEX
179 **/
180void
181gsm_hex_to_bytes0( cbytes_t  hex, int  hexlen, bytes_t  dst )
182{
183    int  nn;
184
185    for (nn = 0; nn < hexlen/2; nn++ ) {
186        dst[nn] = (byte_t) gsm_hex2_to_byte0( (const char*)hex+2*nn );
187    }
188    if (hexlen & 1) {
189        dst[nn] = gsm_hexchar_to_int0( hex[2*nn] ) << 4;
190    }
191}
192
193int
194gsm_hex_to_bytes( cbytes_t  hex, int  hexlen, bytes_t  dst )
195{
196    int  nn;
197
198    if (hexlen & 1)  /* must be even */
199        return -1;
200
201    for (nn = 0; nn < hexlen/2; nn++ ) {
202        int  c = gsm_hex2_to_byte( (const char*)hex+2*nn );
203        if (c < 0) return -1;
204        dst[nn] = (byte_t) c;
205    }
206    return hexlen/2;
207}
208
209void
210gsm_hex_from_bytes( char*  hex, cbytes_t  src, int  srclen )
211{
212    int  nn;
213
214    for (nn = 0; nn < srclen; nn++) {
215        gsm_hex_from_byte( hex + 2*nn, src[nn] );
216    }
217}
218
219/** ROPES
220 **/
221
222void
223gsm_rope_init( GsmRope  rope )
224{
225    rope->data  = NULL;
226    rope->pos   = 0;
227    rope->max   = 0;
228    rope->error = 0;
229}
230
231void
232gsm_rope_init_alloc( GsmRope  rope, int  count )
233{
234    rope->data  = rope->data0;
235    rope->pos   = 0;
236    rope->max   = sizeof(rope->data0);
237    rope->error = 0;
238
239    if (count > 0) {
240        rope->data = calloc( count, 1 );
241        rope->max  = count;
242
243        if (rope->data == NULL) {
244            rope->error = 1;
245            rope->max   = 0;
246        }
247    }
248}
249
250int
251gsm_rope_done( GsmRope  rope )
252{
253    int  result = rope->error;
254
255    if (rope->data && rope->data != rope->data0)
256        free(rope->data);
257
258    rope->data  = NULL;
259    rope->pos   = 0;
260    rope->max   = 0;
261    rope->error = 0;
262
263    return result;
264}
265
266
267bytes_t
268gsm_rope_done_acquire( GsmRope  rope, int  *psize )
269{
270    bytes_t  result = rope->data;
271
272    *psize = rope->pos;
273    if (result == rope->data0) {
274        result = malloc(  rope->pos );
275        if (result != NULL)
276            memcpy( result, rope->data, rope->pos );
277    }
278    return result;
279}
280
281
282int
283gsm_rope_ensure( GsmRope  rope, int  new_count )
284{
285    if (rope->data != NULL) {
286        int       old_max  = rope->max;
287        bytes_t   old_data = rope->data == rope->data0 ? NULL : rope->data;
288        int       new_max  = old_max;
289        bytes_t   new_data;
290
291        while (new_max < new_count) {
292            new_max += (new_max >> 1) + 4;
293        }
294        new_data = realloc( old_data, new_max );
295        if (new_data == NULL) {
296            rope->error = 1;
297            return -1;
298        }
299        rope->data = new_data;
300        rope->max  = new_max;
301    } else {
302        rope->max = new_count;
303    }
304    return 0;
305}
306
307static int
308gsm_rope_can_grow( GsmRope  rope, int  count )
309{
310    if (!rope->data || rope->error)
311        return 0;
312
313    if (rope->pos + count > rope->max)
314    {
315        if (rope->data == NULL)
316            rope->max = rope->pos + count;
317
318        else if (rope->error ||
319                 gsm_rope_ensure( rope, rope->pos + count ) < 0)
320            return 0;
321    }
322    return 1;
323}
324
325void
326gsm_rope_add_c( GsmRope  rope,  char  c )
327{
328    if (gsm_rope_can_grow(rope, 1)) {
329        rope->data[ rope->pos ] = (byte_t) c;
330    }
331    rope->pos += 1;
332}
333
334void
335gsm_rope_add( GsmRope  rope, const void*  buf, int  buflen )
336{
337    if (gsm_rope_can_grow(rope, buflen)) {
338        memcpy( rope->data + rope->pos, (const char*)buf, buflen );
339    }
340    rope->pos += buflen;
341}
342
343void*
344gsm_rope_reserve( GsmRope  rope, int  count )
345{
346    void*  result = NULL;
347
348    if (gsm_rope_can_grow(rope, count))
349    {
350        if (rope->data != NULL)
351            result = rope->data + rope->pos;
352    }
353    rope->pos += count;
354
355    return result;
356}
357
358/* skip a given number of Unicode characters in a utf-8 byte string */
359cbytes_t
360utf8_skip( cbytes_t   utf8,
361           cbytes_t   utf8end,
362           int        count)
363{
364    cbytes_t  p   = utf8;
365    cbytes_t  end = utf8end;
366
367    for ( ; count > 0; count-- ) {
368        int  c;
369
370        if (p >= end)
371            break;
372
373        c = *p++;
374        if (c > 128) {
375            while (p < end && (p[0] & 0xc0) == 0x80)
376                p++;
377        }
378    }
379    return  p;
380}
381
382
383static __inline__ int
384utf8_next( cbytes_t  *pp, cbytes_t  end )
385{
386    cbytes_t  p      = *pp;
387    int       result = -1;
388
389    if (p < end) {
390        int  c= *p++;
391        if (c >= 128) {
392            if ((c & 0xe0) == 0xc0)
393                c &= 0x1f;
394            else if ((c & 0xf0) == 0xe0)
395                c &= 0x0f;
396            else
397                c &= 0x07;
398
399            while (p < end && (p[0] & 0xc0) == 0x80) {
400                c = (c << 6) | (p[0] & 0x3f);
401                p ++;
402            }
403        }
404        result = c;
405        *pp    = p;
406    }
407    return result;
408}
409
410
411__inline__ int
412utf8_write( bytes_t  utf8, int  offset, int  v )
413{
414    int  result;
415
416    if (v < 128) {
417        result = 1;
418        if (utf8)
419            utf8[offset] = (byte_t) v;
420    } else if (v < 0x800) {
421        result = 2;
422        if (utf8) {
423            utf8[offset+0] = (byte_t)( 0xc0 | (v >> 6) );
424            utf8[offset+1] = (byte_t)( 0x80 | (v & 0x3f) );
425        }
426    } else if (v < 0x10000) {
427        result = 3;
428        if (utf8) {
429            utf8[offset+0] = (byte_t)( 0xe0 |  (v >> 12) );
430            utf8[offset+1] = (byte_t)( 0x80 | ((v >> 6) & 0x3f) );
431            utf8[offset+2] = (byte_t)( 0x80 |  (v & 0x3f) );
432        }
433    } else {
434        result = 4;
435        if (utf8) {
436            utf8[offset+0] = (byte_t)( 0xf0 | ((v >> 18) & 0x7) );
437            utf8[offset+1] = (byte_t)( 0x80 | ((v >> 12) & 0x3f) );
438            utf8[offset+2] = (byte_t)( 0x80 | ((v >> 6) & 0x3f) );
439            utf8[offset+3] = (byte_t)( 0x80 |  (v & 0x3f) );
440        }
441    }
442    return  result;
443}
444
445static __inline__ int
446ucs2_write( bytes_t  ucs2, int  offset, int  v )
447{
448    if (ucs2) {
449        ucs2[offset+0] = (byte_t) (v >> 8);
450        ucs2[offset+1] = (byte_t) (v);
451    }
452    return 2;
453}
454
455int
456utf8_check( cbytes_t   p, int  utf8len )
457{
458    cbytes_t  end    = p + utf8len;
459    int       result = 0;
460
461    if (p) {
462        while (p < end) {
463            int  c = *p++;
464            if (c >= 128) {
465                int  len;
466                if ((c & 0xe0) == 0xc0) {
467                    len = 1;
468                }
469                else if ((c & 0xf0) == 0xe0) {
470                    len = 2;
471                }
472                else if ((c & 0xf8) == 0xf0) {
473                    len = 3;
474                }
475                else
476                    goto Exit;  /* malformed utf-8 */
477
478                if (p+len > end) /* string too short */
479                    goto Exit;
480
481                for ( ; len > 0; len--, p++ ) {
482                    if ((p[0] & 0xc0) != 0x80)
483                        goto Exit;
484                }
485            }
486        }
487        result = 1;
488    }
489Exit:
490    return result;
491}
492
493/** UCS2 to UTF8
494 **/
495
496/* convert a UCS2 string into a UTF8 byte string, assumes 'buf' is correctly sized */
497int
498ucs2_to_utf8( cbytes_t  ucs2,
499              int       ucs2len,
500              bytes_t   buf )
501{
502    int  nn;
503    int  result = 0;
504
505    for (nn = 0; nn < ucs2len; ucs2 += 2, nn++) {
506        int  c= (ucs2[0] << 8) | ucs2[1];
507        result += utf8_write(buf, result, c);
508    }
509    return result;
510}
511
512/* count the number of UCS2 chars contained in a utf8 byte string */
513int
514utf8_to_ucs2( cbytes_t  utf8,
515              int       utf8len,
516              bytes_t   ucs2 )
517{
518    cbytes_t  p      = utf8;
519    cbytes_t  end    = p + utf8len;
520    int       result = 0;
521
522    while (p < end) {
523        int  c = utf8_next(&p, end);
524
525        if (c < 0)
526            break;
527
528        result += ucs2_write(ucs2, result, c);
529    }
530    return result/2;
531}
532
533
534
535/** GSM ALPHABET
536 **/
537
538#define  GSM_7BITS_ESCAPE   0x1b
539#define  GSM_7BITS_UNKNOWN  0
540
541static const unsigned short   gsm7bits_to_unicode[128] = {
542  '@', 0xa3,  '$', 0xa5, 0xe8, 0xe9, 0xf9, 0xec, 0xf2, 0xc7, '\n', 0xd8, 0xf8, '\r', 0xc5, 0xe5,
5430x394,  '_',0x3a6,0x393,0x39b,0x3a9,0x3a0,0x3a8,0x3a3,0x398,0x39e,    0, 0xc6, 0xe6, 0xdf, 0xc9,
544  ' ',  '!',  '"',  '#', 0xa4,  '%',  '&', '\'',  '(',  ')',  '*',  '+',  ',',  '-',  '.',  '/',
545  '0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',  '8',  '9',  ':',  ';',  '<',  '=',  '>',  '?',
546 0xa1,  'A',  'B',  'C',  'D',  'E',  'F',  'G',  'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
547  'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',  'X',  'Y',  'Z', 0xc4, 0xd6,0x147, 0xdc, 0xa7,
548 0xbf,  'a',  'b',  'c',  'd',  'e',  'f',  'g',  'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
549  'p',  'q',  'r',  's',  't',  'u',  'v',  'w',  'x',  'y',  'z', 0xe4, 0xf6, 0xf1, 0xfc, 0xe0,
550};
551
552static const unsigned short  gsm7bits_extend_to_unicode[128] = {
553    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,'\f',   0,   0,   0,   0,   0,
554    0,   0,   0,   0, '^',   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
555    0,   0,   0,   0,   0,   0,   0,   0, '{', '}',   0,   0,   0,   0,   0,'\\',
556    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, '[', '~', ']',   0,
557  '|',   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
558    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
559    0,   0,   0,   0,   0,0x20ac, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
560    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
561};
562
563
564static int
565unichar_to_gsm7( int  unicode )
566{
567    int  nn;
568    for (nn = 0; nn < 128; nn++) {
569        if (gsm7bits_to_unicode[nn] == unicode) {
570            return nn;
571        }
572    }
573    return -1;
574}
575
576static int
577unichar_to_gsm7_extend( int  unichar )
578{
579    int  nn;
580    for (nn = 0; nn < 128; nn++) {
581        if (gsm7bits_extend_to_unicode[nn] == unichar) {
582            return nn;
583        }
584    }
585    return -1;
586}
587
588
589/* return the number of septets needed to encode a unicode charcode */
590static int
591unichar_to_gsm7_count( int  unicode )
592{
593    int  nn;
594
595    nn = unichar_to_gsm7(unicode);
596    if (nn >= 0)
597        return 1;
598
599    nn = unichar_to_gsm7_extend(unicode);
600    if (nn >= 0)
601        return 2;
602
603    return 0;
604}
605
606
607cbytes_t
608utf8_skip_gsm7( cbytes_t  utf8, cbytes_t  utf8end, int  gsm7len )
609{
610    cbytes_t  p   = utf8;
611    cbytes_t  end = utf8end;
612
613    while (gsm7len >0) {
614        cbytes_t  q = p;
615        int       c = utf8_next( &q, end );
616        int       len;
617
618        if (c < 0)
619            break;
620
621        len = unichar_to_gsm7_count( c );
622        if (len == 0)  /* unknown chars are replaced by spaces */
623            len = 1;
624
625        if (len > gsm7len)
626            break;
627
628        gsm7len -= len;
629        p        = q;
630    }
631    return  p;
632}
633
634
635int
636utf8_check_gsm7( cbytes_t  utf8,
637                 int       utf8len )
638{
639    cbytes_t  utf8end = utf8 + utf8len;
640
641    while (utf8 < utf8end) {
642        int  c = utf8_next( &utf8, utf8end );
643        if (unichar_to_gsm7_count(c) == 0)
644            return 0;
645    }
646    return 1;
647}
648
649
650int
651utf8_from_gsm7( cbytes_t  src,
652                int       septet_offset,
653                int       septet_count,
654                bytes_t   utf8 )
655{
656    int  shift   = (septet_offset & 7);
657    int  escaped = 0;
658    int  result  = 0;
659
660    src += (septet_offset >> 3);
661    for ( ; septet_count > 0; septet_count-- )
662    {
663        int  c = (src[0] >> shift) & 0x7f;
664        int  v;
665
666        if (shift > 1) {
667            c = ((src[1] << (8-shift)) | c) & 0x7f;
668        }
669
670        if (escaped) {
671            v = gsm7bits_extend_to_unicode[c];
672        } else if (c == GSM_7BITS_ESCAPE) {
673            escaped = 1;
674            goto NextSeptet;
675        } else {
676            v = gsm7bits_to_unicode[c];
677        }
678
679        result += utf8_write( utf8, result, v );
680
681    NextSeptet:
682        shift += 7;
683        if (shift >= 8) {
684            shift -= 8;
685            src   += 1;
686        }
687    }
688    return  result;
689}
690
691
692int
693utf8_from_gsm8( cbytes_t  src, int  count, bytes_t  utf8 )
694{
695    int  result  = 0;
696    int  escaped = 0;
697
698
699    for ( ; count > 0; count-- )
700    {
701        int  c = *src++;
702
703        if (c == 0xff)
704            break;
705
706        if (c == GSM_7BITS_ESCAPE) {
707            if (escaped) { /* two escape characters => one space */
708                c = 0x20;
709                escaped = 0;
710            } else {
711                escaped = 1;
712                continue;
713            }
714        }
715        else
716        {
717            if (c >= 0x80) {
718                c       = 0x20;
719                escaped = 0;
720            } else if (escaped) {
721                c = gsm7bits_extend_to_unicode[c];
722            } else
723                c = gsm7bits_to_unicode[c];
724        }
725
726        result += utf8_write( utf8, result, c );
727    }
728    return  result;
729}
730
731/* convert a GSM 7-bit message into a unicode character array
732 * the 'dst' array must contain at least 160 chars. the function
733 * returns the number of characters decoded
734 *
735 * assumes the 'dst' array has at least septet_count items, returns the
736 * number of unichars really written
737 */
738int
739ucs2_from_gsm7( bytes_t   ucs2,
740                cbytes_t  src,
741                int       septet_offset,
742                int       septet_count )
743{
744    const unsigned char*  p     = src + (septet_offset >> 3);
745    int                   shift = (septet_offset & 7);
746    int                   escaped = 0;
747    int                   result  = 0;
748
749    for ( ; septet_count > 0; septet_count-- )
750    {
751        unsigned  val  = (p[0] >> shift) & 0x7f;
752
753        if (shift > 1)
754            val = (val | (p[1] << (8-shift))) & 0x7f;
755
756        if (escaped) {
757            int  c = gsm7bits_to_unicode[val];
758
759            result += ucs2_write(ucs2, result, c);
760            escaped = 0;
761        }
762        else if (val == GSM_7BITS_ESCAPE) {
763            escaped = 1;
764        }
765        else {
766            val = gsm7bits_extend_to_unicode[val];
767            if (val == 0)
768                val = 0x20;
769
770            result += ucs2_write( ucs2, result, val );
771        }
772    }
773    return result/2;
774}
775
776
777/* count the number of septets required to write a utf8 string */
778static int
779utf8_to_gsm7_count( cbytes_t  utf8, int  utf8len )
780{
781    cbytes_t  utf8end = utf8 + utf8len;
782    int       result  = 0;
783
784    while ( utf8 < utf8end ) {
785        int  len;
786        int  c = utf8_next( &utf8, utf8end );
787
788        if (c < 0)
789            break;
790
791        len = unichar_to_gsm7_count(c);
792        if (len == 0)    /* replace non-representables with space */
793            len = 1;
794
795        result += len;
796    }
797    return result;
798}
799
800typedef struct {
801    bytes_t   dst;
802    unsigned  pad;
803    int       bits;
804    int       offset;
805} BWriterRec, *BWriter;
806
807static void
808bwriter_init( BWriter  writer, bytes_t  dst, int  start )
809{
810    int  shift = start & 7;
811
812    writer->dst    = dst + (start >> 3);
813    writer->pad    = 0;
814    writer->bits   = shift;
815    writer->offset = start;
816
817    if (shift > 0) {
818        writer->pad  = writer->dst[0] & ~(0xFF << shift);
819    }
820}
821
822static void
823bwriter_add7( BWriter  writer, unsigned  value )
824{
825    writer->pad  |= (unsigned)(value << writer->bits);
826    writer->bits += 7;
827    if (writer->bits >= 8) {
828        writer->dst[0] = (byte_t)writer->pad;
829        writer->bits  -= 8;
830        writer->pad  >>= 8;
831        writer->dst   += 1;
832    }
833    writer->offset += 7;
834}
835
836static int
837bwriter_done( BWriter  writer )
838{
839    if (writer->bits > 0) {
840        writer->dst[0] = (byte_t)writer->pad;
841        writer->pad    = 0;
842        writer->bits   = 0;
843        writer->dst   += 1;
844    }
845    return writer->offset;
846}
847
848/* convert a utf8 string to a gsm7 byte string - return the number of septets written */
849int
850utf8_to_gsm7( cbytes_t  utf8, int  utf8len, bytes_t  dst, int offset )
851{
852    const unsigned char*  utf8end = utf8 + utf8len;
853    BWriterRec            writer[1];
854
855    if (dst == NULL)
856        return utf8_to_gsm7_count(utf8, utf8len);
857
858    bwriter_init( writer, dst, offset );
859    while ( utf8 < utf8end ) {
860        int  c = utf8_next( &utf8, utf8end );
861        int  nn;
862
863        if (c < 0)
864            break;
865
866        nn = unichar_to_gsm7(c);
867        if (nn >= 0) {
868            bwriter_add7( writer, nn );
869            continue;
870        }
871
872        nn = unichar_to_gsm7_extend(c);
873        if (nn >= 0) {
874            bwriter_add7( writer, GSM_7BITS_ESCAPE );
875            bwriter_add7( writer, nn );
876            continue;
877        }
878
879        /* unknown => replaced by space */
880        bwriter_add7( writer, 0x20 );
881    }
882    return  bwriter_done( writer );
883}
884
885
886int
887utf8_to_gsm8( cbytes_t  utf8, int  utf8len, bytes_t  dst )
888{
889    const unsigned char*  utf8end = utf8 + utf8len;
890    int                   result  = 0;
891
892    while ( utf8 < utf8end ) {
893        int  c = utf8_next( &utf8, utf8end );
894        int  nn;
895
896        if (c < 0)
897            break;
898
899        nn = unichar_to_gsm7(c);
900        if (nn >= 0) {
901            if (dst)
902                dst[result] = (byte_t)nn;
903            result += 1;
904            continue;
905        }
906
907        nn = unichar_to_gsm7_extend(c);
908        if (nn >= 0) {
909            if (dst) {
910                dst[result+0] = (byte_t) GSM_7BITS_ESCAPE;
911                dst[result+1] = (byte_t) nn;
912            }
913            result += 2;
914            continue;
915        }
916
917        /* unknown => space */
918        if (dst)
919            dst[result] = 0x20;
920        result += 1;
921    }
922    return  result;
923}
924
925
926int
927ucs2_to_gsm7( cbytes_t  ucs2, int  ucs2len, bytes_t  dst, int offset )
928{
929    const unsigned char*  ucs2end = ucs2 + ucs2len*2;
930    BWriterRec            writer[1];
931
932    bwriter_init( writer, dst, offset );
933    while ( ucs2 < ucs2end ) {
934        int  c = *ucs2++;
935        int  nn;
936
937        for (nn = 0; nn < 128; nn++) {
938            if ( gsm7bits_to_unicode[nn] == c ) {
939                bwriter_add7( writer, nn );
940                goto NextUnicode;
941            }
942        }
943        for (nn = 0; nn < 128; nn++) {
944            if ( gsm7bits_extend_to_unicode[nn] == c ) {
945                bwriter_add7( writer, GSM_7BITS_ESCAPE );
946                bwriter_add7( writer, nn );
947                goto NextUnicode;
948            }
949        }
950
951        /* unknown */
952        bwriter_add7( writer, 0x20 );
953
954    NextUnicode:
955        ;
956    }
957    return  bwriter_done( writer );
958}
959
960
961int
962ucs2_to_gsm8( cbytes_t  ucs2, int  ucs2len, bytes_t  dst )
963{
964    const unsigned char*  ucs2end = ucs2 + ucs2len*2;
965    bytes_t               dst0    = dst;
966
967    while ( ucs2 < ucs2end ) {
968        int  c = *ucs2++;
969        int  nn;
970
971        for (nn = 0; nn < 128; nn++) {
972            if ( gsm7bits_to_unicode[nn] == c ) {
973                *dst++ = (byte_t)nn;
974                goto NextUnicode;
975            }
976        }
977        for (nn = 0; nn < 128; nn++) {
978            if ( gsm7bits_extend_to_unicode[nn] == c ) {
979                dst[0] = (byte_t) GSM_7BITS_ESCAPE;
980                dst[1] = (byte_t) nn;
981                dst   += 2;
982                goto NextUnicode;
983            }
984        }
985
986        /* unknown */
987        *dst++ = 0x20;
988
989    NextUnicode:
990        ;
991    }
992    return (dst - dst0);
993}
994
995int
996gsm_bcdnum_to_ascii( cbytes_t  bcd, int  count, bytes_t  dst )
997{
998    int  result = 0;
999    int  shift  = 0;
1000
1001    while (count > 0) {
1002        int  c = (bcd[0] >> shift) & 0xf;
1003
1004        if (c == 15 && count == 1)  /* ignore trailing 0xf */
1005            break;
1006
1007        if (c >= 14)
1008            c = 0;
1009
1010        if (dst) dst[result] = "0123456789*#,N"[c];
1011        result += 1;
1012
1013        shift += 4;
1014        if (shift == 8) {
1015            shift = 0;
1016            bcd += 1;
1017        }
1018    }
1019    return  result;
1020}
1021
1022
1023int
1024gsm_bcdnum_from_ascii( cbytes_t  ascii, int  asciilen, bytes_t  dst )
1025{
1026    cbytes_t  end = ascii + asciilen;
1027    int  result   = 0;
1028    int  phase = 0x01;
1029
1030    while (ascii < end) {
1031        int  c = *ascii++;
1032
1033        if (c == '*')
1034            c = 10;
1035        else if (c == '#')
1036            c = 11;
1037        else if (c == ',')
1038            c = 12;
1039        else if (c == 'N')
1040            c = 13;
1041        else {
1042            c -= '0';
1043            if ((unsigned)c >= 10U)
1044                return -1;
1045        }
1046        phase   = (phase << 4) | c;
1047        result += 1;
1048        if (phase & 0x100) {
1049            if (dst) dst[result/2] = (byte_t) phase;
1050            phase   = 0x01;
1051        }
1052    }
1053
1054    if (result & 1) {
1055        if (dst) dst[result/2] = (byte_t)(phase | 0xf0);
1056    }
1057    return result;
1058}
1059
1060/** ADN: Abbreviated Dialing Number
1061 **/
1062
1063#define  ADN_FOOTER_SIZE     14
1064#define  ADN_OFFSET_NUMBER_LENGTH   0
1065#define  ADN_OFFSET_TON_NPI         1
1066#define  ADN_OFFSET_NUMBER_START    2
1067#define  ADN_OFFSET_NUMBER_END      11
1068#define  ADN_OFFSET_CAPABILITY_ID   12
1069#define  ADN_OFFSET_EXTENSION_ID    13
1070
1071/* see 10.5.1 of 3GPP 51.011 */
1072static int
1073sim_adn_alpha_to_utf8( cbytes_t  alpha, cbytes_t  end, bytes_t  dst )
1074{
1075    int  result = 0;
1076
1077    /* ignore trailing 0xff */
1078    while (alpha < end && end[-1] == 0xff)
1079        end--;
1080
1081    if (alpha >= end)
1082        return 0;
1083
1084    if (alpha[0] == 0x80) { /* UCS/2 source encoding */
1085        alpha += 1;
1086        result = ucs2_to_utf8( alpha, (end-alpha)/2, dst );
1087    }
1088    else
1089    {
1090        int  is_ucs2 = 0;
1091        int  len = 0, base = 0;
1092
1093        if (alpha+3 <= end && alpha[0] == 0x81) {
1094            is_ucs2 = 1;
1095            len     = alpha[1];
1096            base    = alpha[2] << 7;
1097            alpha  += 3;
1098            if (len > end-alpha)
1099                len = end-alpha;
1100        } else if (alpha+4 <= end && alpha[0] == 0x82) {
1101            is_ucs2 = 1;
1102            len     = alpha[1];
1103            base    = (alpha[2] << 8) | alpha[3];
1104            alpha  += 4;
1105            if (len > end-alpha)
1106                len = end-alpha;
1107        }
1108
1109        if (is_ucs2) {
1110            end = alpha + len;
1111            while (alpha < end) {
1112                int  c = alpha[0];
1113                if (c >= 0x80) {
1114                    result += utf8_write(dst, result, base + (c & 0x7f));
1115                    alpha  += 1;
1116                } else {
1117                    /* GSM character set */
1118                    int   count;
1119                    for (count = 0; alpha+count < end && alpha[count] < 128; count++)
1120                        ;
1121                    result += utf8_from_gsm8(alpha, count, (dst ? dst+result : NULL));
1122                    alpha  += count;
1123                }
1124            }
1125        }
1126        else {
1127            result = utf8_from_gsm8(alpha, end-alpha, dst);
1128        }
1129    }
1130    return result;
1131}
1132
1133#if 0
1134static int
1135sim_adn_alpha_from_utf8( cbytes_t  utf8, int  utf8len, bytes_t  dst )
1136{
1137    int   result = 0;
1138
1139    if (utf8_check_gsm7(utf8, utf8len)) {
1140        /* GSM 7-bit compatible, encode directly as 8-bit string */
1141        result = utf8_to_gsm8(utf8, utf8len, dst);
1142    } else {
1143        /* otherwise, simply try UCS-2 encoding, nothing more serious at the moment */
1144        if (dst) {
1145            dst[0] = 0x80;
1146        }
1147        result = 1 + utf8_to_ucs2(utf8, utf8len, dst ? (dst+1) : NULL)*2;
1148    }
1149    return  result;
1150}
1151#endif
1152
1153int
1154sim_adn_record_from_bytes( SimAdnRecord  rec, cbytes_t  data, int  len )
1155{
1156    cbytes_t  end    = data + len;
1157    cbytes_t  footer = end - ADN_FOOTER_SIZE;
1158    int       num_len;
1159
1160    rec->adn.alpha[0]  = 0;
1161    rec->adn.number[0] = 0;
1162    rec->ext_record    = 0xff;
1163
1164    if (len < ADN_FOOTER_SIZE)
1165        return -1;
1166
1167    /* alpha is optional */
1168    if (len > ADN_FOOTER_SIZE) {
1169        cbytes_t  dataend = data + len - ADN_FOOTER_SIZE;
1170        int       count   = sim_adn_alpha_to_utf8(data, dataend, NULL);
1171
1172        if (count > sizeof(rec->adn.alpha)-1)  /* too long */
1173            return -1;
1174
1175        sim_adn_alpha_to_utf8(data, dataend, rec->adn.alpha);
1176        rec->adn.alpha[count] = 0;
1177    }
1178
1179    num_len = footer[ADN_OFFSET_NUMBER_LENGTH];
1180    if (num_len > 11)
1181        return -1;
1182
1183    /* decode TON and number to ASCII, NOTE: this is lossy !! */
1184    {
1185        int      ton    = footer[ADN_OFFSET_TON_NPI];
1186        bytes_t  number = (bytes_t) rec->adn.number;
1187        int      len    = sizeof(rec->adn.number)-1;
1188        int      count;
1189
1190        if (ton != 0x81 && ton != 0x91)
1191            return -1;
1192
1193        if (ton == 0x91) {
1194            *number++ = '+';
1195            len      -= 1;
1196        }
1197
1198        count = gsm_bcdnum_to_ascii( footer + ADN_OFFSET_NUMBER_START,
1199                                     num_len*2, number );
1200        number[count] = 0;
1201    }
1202    return 0;
1203}
1204
1205int
1206sim_adn_record_to_bytes( SimAdnRecord  rec, bytes_t   data, int  datalen )
1207{
1208    bytes_t   end    = data + datalen;
1209    bytes_t   footer = end - ADN_FOOTER_SIZE;
1210    int       ton    = 0x81;
1211    cbytes_t  number = (cbytes_t) rec->adn.number;
1212
1213    if (number[0] == '+') {
1214        ton     = 0x91;
1215        number += 1;
1216    }
1217    footer[0] = (strlen((const char*)number)+1)/2 + 1;
1218    /* XXXX: TODO */
1219    return 0;
1220}
1221