gsm.c revision 55f4e4a5ec657a017e3bf75299ad71fd1c968dd3
1/* Copyright (C) 2007-2008 The Android Open Source Project 2** 3** This software is licensed under the terms of the GNU General Public 4** License version 2, as published by the Free Software Foundation, and 5** may be copied, distributed, and modified under those terms. 6** 7** This program is distributed in the hope that it will be useful, 8** but WITHOUT ANY WARRANTY; without even the implied warranty of 9** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10** GNU General Public License for more details. 11*/ 12#include "gsm.h" 13#include <stdlib.h> 14#include <string.h> 15 16/** UTILITIES 17 **/ 18byte_t 19gsm_int_to_bcdi( int value ) 20{ 21 return (byte_t)((value / 10) | ((value % 10) << 4)); 22} 23 24int 25gsm_int_from_bcdi( byte_t val ) 26{ 27 int ret = 0; 28 29 if ((val & 0xf0) <= 0x90) 30 ret = (val >> 4); 31 32 if ((val & 0x0f) <= 0x90) 33 ret |= (val % 0xf)*10; 34 35 return ret; 36} 37 38 39static int 40gsm_bcdi_to_ascii( cbytes_t bcd, int bcdlen, bytes_t dst ) 41{ 42 static byte_t bcdichars[14] = "0123456789*#,N"; 43 44 int result = 0; 45 int shift = 0; 46 47 while (bcdlen > 0) { 48 int c = (bcd[0] >> shift) & 0xf; 49 50 if (c == 0xf && bcdlen == 1) 51 break; 52 53 if (c < 14) { 54 if (dst) dst[result] = bcdichars[c]; 55 result += 1; 56 } 57 bcdlen --; 58 shift += 4; 59 if (shift == 8) { 60 bcd++; 61 shift = 0; 62 } 63 } 64 return result; 65} 66 67 68static int 69gsm_bcdi_from_ascii( cbytes_t ascii, int asciilen, bytes_t dst ) 70{ 71 cbytes_t end = ascii + asciilen; 72 int result = 0; 73 int phase = 0x01; 74 75 while (ascii < end) { 76 int c = *ascii++; 77 78 if (c == '*') 79 c = 11; 80 else if (c == '#') 81 c = 12; 82 else if (c == ',') 83 c = 13; 84 else if (c == 'N') 85 c = 14; 86 else { 87 c -= '0'; 88 if ((unsigned)c >= 10) 89 break; 90 } 91 phase = (phase << 4) | c; 92 if (phase & 0x100) { 93 if (dst) dst[result] = (byte_t) phase; 94 result += 1; 95 phase = 0x01; 96 } 97 } 98 if (phase != 0x01) { 99 if (dst) dst[result] = (byte_t)( phase | 0xf0 ); 100 result += 1; 101 } 102 return result; 103} 104 105 106int 107gsm_hexchar_to_int( char c ) 108{ 109 if ((unsigned)(c - '0') < 10) 110 return c - '0'; 111 if ((unsigned)(c - 'a') < 6) 112 return 10 + (c - 'a'); 113 if ((unsigned)(c - 'A') < 6) 114 return 10 + (c - 'A'); 115 return -1; 116} 117 118int 119gsm_hexchar_to_int0( char c ) 120{ 121 int ret = gsm_hexchar_to_int(c); 122 123 return (ret < 0) ? 0 : ret; 124} 125 126int 127gsm_hex2_to_byte( const char* hex ) 128{ 129 int hi = gsm_hexchar_to_int(hex[0]); 130 int lo = gsm_hexchar_to_int(hex[1]); 131 132 if (hi < 0 || lo < 0) 133 return -1; 134 135 return ( (hi << 4) | lo ); 136} 137 138int 139gsm_hex4_to_short( const char* hex ) 140{ 141 int hi = gsm_hex2_to_byte(hex); 142 int lo = gsm_hex2_to_byte(hex+2); 143 144 if (hi < 0 || lo < 0) 145 return -1; 146 147 return ((hi << 8) | lo); 148} 149 150int 151gsm_hex2_to_byte0( const char* hex ) 152{ 153 int hi = gsm_hexchar_to_int0(hex[0]); 154 int lo = gsm_hexchar_to_int0(hex[1]); 155 156 return (byte_t)( (hi << 4) | lo ); 157} 158 159void 160gsm_hex_from_byte( char* hex, int val ) 161{ 162 static const char hexdigits[] = "0123456789abcdef"; 163 164 hex[0] = hexdigits[(val >> 4) & 15]; 165 hex[1] = hexdigits[val & 15]; 166} 167 168void 169gsm_hex_from_short( char* hex, int val ) 170{ 171 gsm_hex_from_byte( hex, (val >> 8) ); 172 gsm_hex_from_byte( hex+2, val ); 173} 174 175 176 177/** HEX 178 **/ 179void 180gsm_hex_to_bytes0( cbytes_t hex, int hexlen, bytes_t dst ) 181{ 182 int nn; 183 184 for (nn = 0; nn < hexlen/2; nn++ ) { 185 dst[nn] = (byte_t) gsm_hex2_to_byte0( (const char*)hex+2*nn ); 186 } 187 if (hexlen & 1) { 188 dst[nn] = gsm_hexchar_to_int0( hex[2*nn] ) << 4; 189 } 190} 191 192int 193gsm_hex_to_bytes( cbytes_t hex, int hexlen, bytes_t dst ) 194{ 195 int nn; 196 197 if (hexlen & 1) /* must be even */ 198 return -1; 199 200 for (nn = 0; nn < hexlen/2; nn++ ) { 201 int c = gsm_hex2_to_byte( (const char*)hex+2*nn ); 202 if (c < 0) return -1; 203 dst[nn] = (byte_t) c; 204 } 205 return hexlen/2; 206} 207 208void 209gsm_hex_from_bytes( char* hex, cbytes_t src, int srclen ) 210{ 211 int nn; 212 213 for (nn = 0; nn < srclen; nn++) { 214 gsm_hex_from_byte( hex + 2*nn, src[nn] ); 215 } 216} 217 218/** ROPES 219 **/ 220 221void 222gsm_rope_init( GsmRope rope ) 223{ 224 rope->data = NULL; 225 rope->pos = 0; 226 rope->max = 0; 227 rope->error = 0; 228} 229 230void 231gsm_rope_init_alloc( GsmRope rope, int count ) 232{ 233 rope->data = rope->data0; 234 rope->pos = 0; 235 rope->max = sizeof(rope->data0); 236 rope->error = 0; 237 238 if (count > 0) { 239 rope->data = calloc( count, 1 ); 240 rope->max = count; 241 242 if (rope->data == NULL) { 243 rope->error = 1; 244 rope->max = 0; 245 } 246 } 247} 248 249int 250gsm_rope_done( GsmRope rope ) 251{ 252 int result = rope->error; 253 254 if (rope->data && rope->data != rope->data0) 255 free(rope->data); 256 257 rope->data = NULL; 258 rope->pos = 0; 259 rope->max = 0; 260 rope->error = 0; 261 262 return result; 263} 264 265 266bytes_t 267gsm_rope_done_acquire( GsmRope rope, int *psize ) 268{ 269 bytes_t result = rope->data; 270 271 *psize = rope->pos; 272 if (result == rope->data0) { 273 result = malloc( rope->pos ); 274 if (result != NULL) 275 memcpy( result, rope->data, rope->pos ); 276 } 277 return result; 278} 279 280 281int 282gsm_rope_ensure( GsmRope rope, int new_count ) 283{ 284 if (rope->data != NULL) { 285 int old_max = rope->max; 286 bytes_t old_data = rope->data == rope->data0 ? NULL : rope->data; 287 int new_max = old_max; 288 bytes_t new_data; 289 290 while (new_max < new_count) { 291 new_max += (new_max >> 1) + 4; 292 } 293 new_data = realloc( old_data, new_max ); 294 if (new_data == NULL) { 295 rope->error = 1; 296 return -1; 297 } 298 rope->data = new_data; 299 rope->max = new_max; 300 } else { 301 rope->max = new_count; 302 } 303 return 0; 304} 305 306static int 307gsm_rope_can_grow( GsmRope rope, int count ) 308{ 309 if (!rope->data || rope->error) 310 return 0; 311 312 if (rope->pos + count > rope->max) 313 { 314 if (rope->data == NULL) 315 rope->max = rope->pos + count; 316 317 else if (rope->error || 318 gsm_rope_ensure( rope, rope->pos + count ) < 0) 319 return 0; 320 } 321 return 1; 322} 323 324void 325gsm_rope_add_c( GsmRope rope, char c ) 326{ 327 if (gsm_rope_can_grow(rope, 1)) { 328 rope->data[ rope->pos ] = (byte_t) c; 329 } 330 rope->pos += 1; 331} 332 333void 334gsm_rope_add( GsmRope rope, const void* buf, int buflen ) 335{ 336 if (gsm_rope_can_grow(rope, buflen)) { 337 memcpy( rope->data + rope->pos, (const char*)buf, buflen ); 338 } 339 rope->pos += buflen; 340} 341 342void* 343gsm_rope_reserve( GsmRope rope, int count ) 344{ 345 void* result = NULL; 346 347 if (gsm_rope_can_grow(rope, count)) 348 { 349 if (rope->data != NULL) 350 result = rope->data + rope->pos; 351 } 352 rope->pos += count; 353 354 return result; 355} 356 357/* skip a given number of Unicode characters in a utf-8 byte string */ 358cbytes_t 359utf8_skip( cbytes_t utf8, 360 cbytes_t utf8end, 361 int count) 362{ 363 cbytes_t p = utf8; 364 cbytes_t end = utf8end; 365 366 for ( ; count > 0; count-- ) { 367 int c; 368 369 if (p >= end) 370 break; 371 372 c = *p++; 373 if (c > 128) { 374 while (p < end && (p[0] & 0xc0) == 0x80) 375 p++; 376 } 377 } 378 return p; 379} 380 381 382static __inline__ int 383utf8_next( cbytes_t *pp, cbytes_t end ) 384{ 385 cbytes_t p = *pp; 386 int result = -1; 387 388 if (p < end) { 389 int c= *p++; 390 if (c >= 128) { 391 if ((c & 0xe0) == 0xc0) 392 c &= 0x1f; 393 else if ((c & 0xf0) == 0xe0) 394 c &= 0x0f; 395 else 396 c &= 0x07; 397 398 while (p < end && (p[0] & 0xc0) == 0x80) { 399 c = (c << 6) | (p[0] & 0x3f); 400 p ++; 401 } 402 } 403 result = c; 404 *pp = p; 405 } 406 return result; 407} 408 409 410__inline__ int 411utf8_write( bytes_t utf8, int offset, int v ) 412{ 413 int result; 414 415 if (v < 128) { 416 result = 1; 417 if (utf8) 418 utf8[offset] = (byte_t) v; 419 } else if (v < 0x800) { 420 result = 2; 421 if (utf8) { 422 utf8[offset+0] = (byte_t)( 0xc0 | (v >> 6) ); 423 utf8[offset+1] = (byte_t)( 0x80 | (v & 0x3f) ); 424 } 425 } else if (v < 0x10000) { 426 result = 3; 427 if (utf8) { 428 utf8[offset+0] = (byte_t)( 0xe0 | (v >> 12) ); 429 utf8[offset+1] = (byte_t)( 0x80 | ((v >> 6) & 0x3f) ); 430 utf8[offset+2] = (byte_t)( 0x80 | (v & 0x3f) ); 431 } 432 } else { 433 result = 4; 434 if (utf8) { 435 utf8[offset+0] = (byte_t)( 0xf0 | ((v >> 18) & 0x7) ); 436 utf8[offset+1] = (byte_t)( 0x80 | ((v >> 12) & 0x3f) ); 437 utf8[offset+2] = (byte_t)( 0x80 | ((v >> 6) & 0x3f) ); 438 utf8[offset+3] = (byte_t)( 0x80 | (v & 0x3f) ); 439 } 440 } 441 return result; 442} 443 444static __inline__ int 445ucs2_write( bytes_t ucs2, int offset, int v ) 446{ 447 if (ucs2) { 448 ucs2[offset+0] = (byte_t) (v >> 8); 449 ucs2[offset+1] = (byte_t) (v); 450 } 451 return 2; 452} 453 454int 455utf8_check( cbytes_t p, int utf8len ) 456{ 457 cbytes_t end = p + utf8len; 458 int result = 0; 459 460 if (p) { 461 while (p < end) { 462 int c = *p++; 463 if (c >= 128) { 464 int len; 465 if ((c & 0xe0) == 0xc0) { 466 len = 1; 467 } 468 else if ((c & 0xf0) == 0xe0) { 469 len = 2; 470 } 471 else if ((c & 0xf8) == 0xf0) { 472 len = 3; 473 } 474 else 475 goto Exit; /* malformed utf-8 */ 476 477 if (p+len > end) /* string too short */ 478 goto Exit; 479 480 for ( ; len > 0; len--, p++ ) { 481 if ((p[0] & 0xc0) != 0x80) 482 goto Exit; 483 } 484 } 485 } 486 result = 1; 487 } 488Exit: 489 return result; 490} 491 492/** UCS2 to UTF8 493 **/ 494 495/* convert a UCS2 string into a UTF8 byte string, assumes 'buf' is correctly sized */ 496int 497ucs2_to_utf8( cbytes_t ucs2, 498 int ucs2len, 499 bytes_t buf ) 500{ 501 int nn; 502 int result = 0; 503 504 for (nn = 0; nn < ucs2len; ucs2 += 2, nn++) { 505 int c= (ucs2[0] << 8) | ucs2[1]; 506 result += utf8_write(buf, result, c); 507 } 508 return result; 509} 510 511/* count the number of UCS2 chars contained in a utf8 byte string */ 512int 513utf8_to_ucs2( cbytes_t utf8, 514 int utf8len, 515 bytes_t ucs2 ) 516{ 517 cbytes_t p = utf8; 518 cbytes_t end = p + utf8len; 519 int result = 0; 520 521 while (p < end) { 522 int c = utf8_next(&p, end); 523 524 if (c < 0) 525 break; 526 527 result += ucs2_write(ucs2, result, c); 528 } 529 return result/2; 530} 531 532 533 534/** GSM ALPHABET 535 **/ 536 537#define GSM_7BITS_ESCAPE 0x1b 538#define GSM_7BITS_UNKNOWN 0 539 540static const unsigned short gsm7bits_to_unicode[128] = { 541 '@', 0xa3, '$', 0xa5, 0xe8, 0xe9, 0xf9, 0xec, 0xf2, 0xc7, '\n', 0xd8, 0xf8, '\r', 0xc5, 0xe5, 5420x394, '_',0x3a6,0x393,0x39b,0x3a9,0x3a0,0x3a8,0x3a3,0x398,0x39e, 0, 0xc6, 0xe6, 0xdf, 0xc9, 543 ' ', '!', '"', '#', 0xa4, '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', 544 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', 545 0xa1, 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 546 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 0xc4, 0xd6,0x147, 0xdc, 0xa7, 547 0xbf, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 548 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0xe4, 0xf6, 0xf1, 0xfc, 0xe0, 549}; 550 551static const unsigned short gsm7bits_extend_to_unicode[128] = { 552 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\f', 0, 0, 0, 0, 0, 553 0, 0, 0, 0, '^', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 554 0, 0, 0, 0, 0, 0, 0, 0, '{', '}', 0, 0, 0, 0, 0,'\\', 555 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '[', '~', ']', 0, 556 '|', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 557 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 558 0, 0, 0, 0, 0,0x20ac, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 560}; 561 562 563static int 564unichar_to_gsm7( int unicode ) 565{ 566 int nn; 567 for (nn = 0; nn < 128; nn++) { 568 if (gsm7bits_to_unicode[nn] == unicode) { 569 return nn; 570 } 571 } 572 return -1; 573} 574 575static int 576unichar_to_gsm7_extend( int unichar ) 577{ 578 int nn; 579 for (nn = 0; nn < 128; nn++) { 580 if (gsm7bits_extend_to_unicode[nn] == unichar) { 581 return nn; 582 } 583 } 584 return -1; 585} 586 587 588/* return the number of septets needed to encode a unicode charcode */ 589static int 590unichar_to_gsm7_count( int unicode ) 591{ 592 int nn; 593 594 nn = unichar_to_gsm7(unicode); 595 if (nn >= 0) 596 return 1; 597 598 nn = unichar_to_gsm7_extend(unicode); 599 if (nn >= 0) 600 return 2; 601 602 return 0; 603} 604 605 606cbytes_t 607utf8_skip_gsm7( cbytes_t utf8, cbytes_t utf8end, int gsm7len ) 608{ 609 cbytes_t p = utf8; 610 cbytes_t end = utf8end; 611 612 while (gsm7len >0) { 613 cbytes_t q = p; 614 int c = utf8_next( &q, end ); 615 int len; 616 617 if (c < 0) 618 break; 619 620 len = unichar_to_gsm7_count( c ); 621 if (len == 0) /* unknown chars are replaced by spaces */ 622 len = 1; 623 624 if (len > gsm7len) 625 break; 626 627 gsm7len -= len; 628 p = q; 629 } 630 return p; 631} 632 633 634int 635utf8_check_gsm7( cbytes_t utf8, 636 int utf8len ) 637{ 638 cbytes_t utf8end = utf8 + utf8len; 639 640 while (utf8 < utf8end) { 641 int c = utf8_next( &utf8, utf8end ); 642 if (unichar_to_gsm7_count(c) == 0) 643 return 0; 644 } 645 return 1; 646} 647 648 649int 650utf8_from_gsm7( cbytes_t src, 651 int septet_offset, 652 int septet_count, 653 bytes_t utf8 ) 654{ 655 int shift = (septet_offset & 7); 656 int escaped = 0; 657 int result = 0; 658 659 src += (septet_offset >> 3); 660 for ( ; septet_count > 0; septet_count-- ) 661 { 662 int c = (src[0] >> shift) & 0x7f; 663 int v; 664 665 if (shift > 1) { 666 c = ((src[1] << (8-shift)) | c) & 0x7f; 667 } 668 669 if (escaped) { 670 v = gsm7bits_extend_to_unicode[c]; 671 } else if (c == GSM_7BITS_ESCAPE) { 672 escaped = 1; 673 goto NextSeptet; 674 } else { 675 v = gsm7bits_to_unicode[c]; 676 } 677 678 result += utf8_write( utf8, result, v ); 679 680 NextSeptet: 681 shift += 7; 682 if (shift >= 8) { 683 shift -= 8; 684 src += 1; 685 } 686 } 687 return result; 688} 689 690 691int 692utf8_from_gsm8( cbytes_t src, int count, bytes_t utf8 ) 693{ 694 int result = 0; 695 int escaped = 0; 696 697 698 for ( ; count > 0; count-- ) 699 { 700 int c = *src++; 701 702 if (c == 0xff) 703 break; 704 705 if (c == GSM_7BITS_ESCAPE) { 706 if (escaped) { /* two escape characters => one space */ 707 c = 0x20; 708 escaped = 0; 709 } else { 710 escaped = 1; 711 continue; 712 } 713 } 714 else 715 { 716 if (c >= 0x80) { 717 c = 0x20; 718 escaped = 0; 719 } else if (escaped) { 720 c = gsm7bits_extend_to_unicode[c]; 721 } else 722 c = gsm7bits_to_unicode[c]; 723 } 724 725 result += utf8_write( utf8, result, c ); 726 } 727 return result; 728} 729 730/* convert a GSM 7-bit message into a unicode character array 731 * the 'dst' array must contain at least 160 chars. the function 732 * returns the number of characters decoded 733 * 734 * assumes the 'dst' array has at least septet_count items, returns the 735 * number of unichars really written 736 */ 737int 738ucs2_from_gsm7( bytes_t ucs2, 739 cbytes_t src, 740 int septet_offset, 741 int septet_count ) 742{ 743 const unsigned char* p = src + (septet_offset >> 3); 744 int shift = (septet_offset & 7); 745 int escaped = 0; 746 int result = 0; 747 748 for ( ; septet_count > 0; septet_count-- ) 749 { 750 unsigned val = (p[0] >> shift) & 0x7f; 751 752 if (shift > 1) 753 val = (val | (p[1] << (8-shift))) & 0x7f; 754 755 if (escaped) { 756 int c = gsm7bits_to_unicode[val]; 757 758 result += ucs2_write(ucs2, result, c); 759 escaped = 0; 760 } 761 else if (val == GSM_7BITS_ESCAPE) { 762 escaped = 1; 763 } 764 else { 765 val = gsm7bits_extend_to_unicode[val]; 766 if (val == 0) 767 val = 0x20; 768 769 result += ucs2_write( ucs2, result, val ); 770 } 771 } 772 return result/2; 773} 774 775 776/* count the number of septets required to write a utf8 string */ 777static int 778utf8_to_gsm7_count( cbytes_t utf8, int utf8len ) 779{ 780 cbytes_t utf8end = utf8 + utf8len; 781 int result = 0; 782 783 while ( utf8 < utf8end ) { 784 int len; 785 int c = utf8_next( &utf8, utf8end ); 786 787 if (c < 0) 788 break; 789 790 len = unichar_to_gsm7_count(c); 791 if (len == 0) /* replace non-representables with space */ 792 len = 1; 793 794 result += len; 795 } 796 return result; 797} 798 799typedef struct { 800 bytes_t dst; 801 unsigned pad; 802 int bits; 803 int offset; 804} BWriterRec, *BWriter; 805 806static void 807bwriter_init( BWriter writer, bytes_t dst, int start ) 808{ 809 int shift = start & 7; 810 811 writer->dst = dst + (start >> 3); 812 writer->pad = 0; 813 writer->bits = shift; 814 writer->offset = start; 815 816 if (shift > 0) { 817 writer->pad = writer->dst[0] & ~(0xFF << shift); 818 } 819} 820 821static void 822bwriter_add7( BWriter writer, unsigned value ) 823{ 824 writer->pad |= (unsigned)(value << writer->bits); 825 writer->bits += 7; 826 if (writer->bits >= 8) { 827 writer->dst[0] = (byte_t)writer->pad; 828 writer->bits -= 8; 829 writer->pad >>= 8; 830 writer->dst += 1; 831 } 832 writer->offset += 7; 833} 834 835static int 836bwriter_done( BWriter writer ) 837{ 838 if (writer->bits > 0) { 839 writer->dst[0] = (byte_t)writer->pad; 840 writer->pad = 0; 841 writer->bits = 0; 842 writer->dst += 1; 843 } 844 return writer->offset; 845} 846 847/* convert a utf8 string to a gsm7 byte string - return the number of septets written */ 848int 849utf8_to_gsm7( cbytes_t utf8, int utf8len, bytes_t dst, int offset ) 850{ 851 const unsigned char* utf8end = utf8 + utf8len; 852 BWriterRec writer[1]; 853 854 if (dst == NULL) 855 return utf8_to_gsm7_count(utf8, utf8len); 856 857 bwriter_init( writer, dst, offset ); 858 while ( utf8 < utf8end ) { 859 int c = utf8_next( &utf8, utf8end ); 860 int nn; 861 862 if (c < 0) 863 break; 864 865 nn = unichar_to_gsm7(c); 866 if (nn >= 0) { 867 bwriter_add7( writer, nn ); 868 continue; 869 } 870 871 nn = unichar_to_gsm7_extend(c); 872 if (nn >= 0) { 873 bwriter_add7( writer, GSM_7BITS_ESCAPE ); 874 bwriter_add7( writer, nn ); 875 continue; 876 } 877 878 /* unknown => replaced by space */ 879 bwriter_add7( writer, 0x20 ); 880 } 881 return bwriter_done( writer ); 882} 883 884 885int 886utf8_to_gsm8( cbytes_t utf8, int utf8len, bytes_t dst ) 887{ 888 const unsigned char* utf8end = utf8 + utf8len; 889 int result = 0; 890 891 while ( utf8 < utf8end ) { 892 int c = utf8_next( &utf8, utf8end ); 893 int nn; 894 895 if (c < 0) 896 break; 897 898 nn = unichar_to_gsm7(c); 899 if (nn >= 0) { 900 if (dst) 901 dst[result] = (byte_t)nn; 902 result += 1; 903 continue; 904 } 905 906 nn = unichar_to_gsm7_extend(c); 907 if (nn >= 0) { 908 if (dst) { 909 dst[result+0] = (byte_t) GSM_7BITS_ESCAPE; 910 dst[result+1] = (byte_t) nn; 911 } 912 result += 2; 913 continue; 914 } 915 916 /* unknown => space */ 917 if (dst) 918 dst[result] = 0x20; 919 result += 1; 920 } 921 return result; 922} 923 924 925int 926ucs2_to_gsm7( cbytes_t ucs2, int ucs2len, bytes_t dst, int offset ) 927{ 928 const unsigned char* ucs2end = ucs2 + ucs2len*2; 929 BWriterRec writer[1]; 930 931 bwriter_init( writer, dst, offset ); 932 while ( ucs2 < ucs2end ) { 933 int c = *ucs2++; 934 int nn; 935 936 for (nn = 0; nn < 128; nn++) { 937 if ( gsm7bits_to_unicode[nn] == c ) { 938 bwriter_add7( writer, nn ); 939 goto NextUnicode; 940 } 941 } 942 for (nn = 0; nn < 128; nn++) { 943 if ( gsm7bits_extend_to_unicode[nn] == c ) { 944 bwriter_add7( writer, GSM_7BITS_ESCAPE ); 945 bwriter_add7( writer, nn ); 946 goto NextUnicode; 947 } 948 } 949 950 /* unknown */ 951 bwriter_add7( writer, 0x20 ); 952 953 NextUnicode: 954 ; 955 } 956 return bwriter_done( writer ); 957} 958 959 960int 961ucs2_to_gsm8( cbytes_t ucs2, int ucs2len, bytes_t dst ) 962{ 963 const unsigned char* ucs2end = ucs2 + ucs2len*2; 964 bytes_t dst0 = dst; 965 966 while ( ucs2 < ucs2end ) { 967 int c = *ucs2++; 968 int nn; 969 970 for (nn = 0; nn < 128; nn++) { 971 if ( gsm7bits_to_unicode[nn] == c ) { 972 *dst++ = (byte_t)nn; 973 goto NextUnicode; 974 } 975 } 976 for (nn = 0; nn < 128; nn++) { 977 if ( gsm7bits_extend_to_unicode[nn] == c ) { 978 dst[0] = (byte_t) GSM_7BITS_ESCAPE; 979 dst[1] = (byte_t) nn; 980 dst += 2; 981 goto NextUnicode; 982 } 983 } 984 985 /* unknown */ 986 *dst++ = 0x20; 987 988 NextUnicode: 989 ; 990 } 991 return (dst - dst0); 992} 993 994int 995gsm_bcdnum_to_ascii( cbytes_t bcd, int count, bytes_t dst ) 996{ 997 int result = 0; 998 int shift = 0; 999 1000 while (count > 0) { 1001 int c = (bcd[0] >> shift) & 0xf; 1002 1003 if (c == 15 && count == 1) /* ignore trailing 0xf */ 1004 break; 1005 1006 if (c >= 14) 1007 c = 0; 1008 1009 if (dst) dst[result] = "0123456789*#,N"[c]; 1010 result += 1; 1011 1012 shift += 4; 1013 if (shift == 8) { 1014 shift = 0; 1015 bcd += 1; 1016 } 1017 } 1018 return result; 1019} 1020 1021 1022int 1023gsm_bcdnum_from_ascii( cbytes_t ascii, int asciilen, bytes_t dst ) 1024{ 1025 cbytes_t end = ascii + asciilen; 1026 int result = 0; 1027 int phase = 0x01; 1028 1029 while (ascii < end) { 1030 int c = *ascii++; 1031 1032 if (c == '*') 1033 c = 10; 1034 else if (c == '#') 1035 c = 11; 1036 else if (c == ',') 1037 c = 12; 1038 else if (c == 'N') 1039 c = 13; 1040 else { 1041 c -= '0'; 1042 if ((unsigned)c >= 10U) 1043 return -1; 1044 } 1045 phase = (phase << 4) | c; 1046 result += 1; 1047 if (phase & 0x100) { 1048 if (dst) dst[result/2] = (byte_t) phase; 1049 phase = 0x01; 1050 } 1051 } 1052 1053 if (result & 1) { 1054 if (dst) dst[result/2] = (byte_t)(phase | 0xf0); 1055 } 1056 return result; 1057} 1058 1059/** ADN: Abbreviated Dialing Number 1060 **/ 1061 1062#define ADN_FOOTER_SIZE 14 1063#define ADN_OFFSET_NUMBER_LENGTH 0 1064#define ADN_OFFSET_TON_NPI 1 1065#define ADN_OFFSET_NUMBER_START 2 1066#define ADN_OFFSET_NUMBER_END 11 1067#define ADN_OFFSET_CAPABILITY_ID 12 1068#define ADN_OFFSET_EXTENSION_ID 13 1069 1070/* see 10.5.1 of 3GPP 51.011 */ 1071static int 1072sim_adn_alpha_to_utf8( cbytes_t alpha, cbytes_t end, bytes_t dst ) 1073{ 1074 int result = 0; 1075 1076 /* ignore trailing 0xff */ 1077 while (alpha < end && end[-1] == 0xff) 1078 end--; 1079 1080 if (alpha >= end) 1081 return 0; 1082 1083 if (alpha[0] == 0x80) { /* UCS/2 source encoding */ 1084 alpha += 1; 1085 result = ucs2_to_utf8( alpha, (end-alpha)/2, dst ); 1086 } 1087 else 1088 { 1089 int is_ucs2 = 0; 1090 int len = 0, base = 0; 1091 1092 if (alpha+3 <= end && alpha[0] == 0x81) { 1093 is_ucs2 = 1; 1094 len = alpha[1]; 1095 base = alpha[2] << 7; 1096 alpha += 3; 1097 if (len > end-alpha) 1098 len = end-alpha; 1099 } else if (alpha+4 <= end && alpha[0] == 0x82) { 1100 is_ucs2 = 1; 1101 len = alpha[1]; 1102 base = (alpha[2] << 8) | alpha[3]; 1103 alpha += 4; 1104 if (len > end-alpha) 1105 len = end-alpha; 1106 } 1107 1108 if (is_ucs2) { 1109 end = alpha + len; 1110 while (alpha < end) { 1111 int c = alpha[0]; 1112 if (c >= 0x80) { 1113 result += utf8_write(dst, result, base + (c & 0x7f)); 1114 alpha += 1; 1115 } else { 1116 /* GSM character set */ 1117 int count; 1118 for (count = 0; alpha+count < end && alpha[count] < 128; count++) 1119 ; 1120 result += utf8_from_gsm8(alpha, count, (dst ? dst+result : NULL)); 1121 alpha += count; 1122 } 1123 } 1124 } 1125 else { 1126 result = utf8_from_gsm8(alpha, end-alpha, dst); 1127 } 1128 } 1129 return result; 1130} 1131 1132static int 1133sim_adn_alpha_from_utf8( cbytes_t utf8, int utf8len, bytes_t dst ) 1134{ 1135 int result = 0; 1136 1137 if (utf8_check_gsm7(utf8, utf8len)) { 1138 /* GSM 7-bit compatible, encode directly as 8-bit string */ 1139 result = utf8_to_gsm8(utf8, utf8len, dst); 1140 } else { 1141 /* otherwise, simply try UCS-2 encoding, nothing more serious at the moment */ 1142 if (dst) { 1143 dst[0] = 0x80; 1144 } 1145 result = 1 + utf8_to_ucs2(utf8, utf8len, dst ? (dst+1) : NULL)*2; 1146 } 1147 return result; 1148} 1149 1150int 1151sim_adn_record_from_bytes( SimAdnRecord rec, cbytes_t data, int len ) 1152{ 1153 cbytes_t end = data + len; 1154 cbytes_t footer = end - ADN_FOOTER_SIZE; 1155 int num_len; 1156 1157 rec->adn.alpha[0] = 0; 1158 rec->adn.number[0] = 0; 1159 rec->ext_record = 0xff; 1160 1161 if (len < ADN_FOOTER_SIZE) 1162 return -1; 1163 1164 /* alpha is optional */ 1165 if (len > ADN_FOOTER_SIZE) { 1166 cbytes_t dataend = data + len - ADN_FOOTER_SIZE; 1167 int count = sim_adn_alpha_to_utf8(data, dataend, NULL); 1168 1169 if (count > sizeof(rec->adn.alpha)-1) /* too long */ 1170 return -1; 1171 1172 sim_adn_alpha_to_utf8(data, dataend, rec->adn.alpha); 1173 rec->adn.alpha[count] = 0; 1174 } 1175 1176 num_len = footer[ADN_OFFSET_NUMBER_LENGTH]; 1177 if (num_len > 11) 1178 return -1; 1179 1180 /* decode TON and number to ASCII, NOTE: this is lossy !! */ 1181 { 1182 int ton = footer[ADN_OFFSET_TON_NPI]; 1183 bytes_t number = (bytes_t) rec->adn.number; 1184 int len = sizeof(rec->adn.number)-1; 1185 int count; 1186 1187 if (ton != 0x81 && ton != 0x91) 1188 return -1; 1189 1190 if (ton == 0x91) { 1191 *number++ = '+'; 1192 len -= 1; 1193 } 1194 1195 count = gsm_bcdnum_to_ascii( footer + ADN_OFFSET_NUMBER_START, 1196 num_len*2, number ); 1197 number[count] = 0; 1198 } 1199 return 0; 1200} 1201 1202int 1203sim_adn_record_to_bytes( SimAdnRecord rec, bytes_t data, int datalen ) 1204{ 1205 bytes_t end = data + datalen; 1206 bytes_t footer = end - ADN_FOOTER_SIZE; 1207 int ton = 0x81; 1208 cbytes_t number = (cbytes_t) rec->adn.number; 1209 1210 if (number[0] == '+') { 1211 ton = 0x91; 1212 number += 1; 1213 } 1214 footer[0] = (strlen((const char*)number)+1)/2 + 1; 1215 /* XXXX: TODO */ 1216 return 0; 1217} 1218