GsmAlphabet.java revision 17f616823a562ceb3a008f91e05d43bc56d37cae
1/* 2 * Copyright (C) 2006 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.internal.telephony; 18 19import android.telephony.SmsMessage; 20import android.util.SparseIntArray; 21 22import android.util.Log; 23 24/** 25 * This class implements the character set mapping between 26 * the GSM SMS 7-bit alphabet specified in TS 23.038 6.2.1 27 * and UTF-16 28 * 29 * {@hide} 30 */ 31public class GsmAlphabet { 32 static final String LOG_TAG = "GSM"; 33 34 35 36 //***** Constants 37 38 /** 39 * This escapes extended characters, and when present indicates that the 40 * following character should 41 * be looked up in the "extended" table 42 * 43 * gsmToChar(GSM_EXTENDED_ESCAPE) returns 0xffff 44 */ 45 46 public static final byte GSM_EXTENDED_ESCAPE = 0x1B; 47 48 49 /** 50 * char to GSM alphabet char 51 * Returns ' ' in GSM alphabet if there's no possible match 52 * Returns GSM_EXTENDED_ESCAPE if this character is in the extended table 53 * In this case, you must call charToGsmExtended() for the value that 54 * should follow GSM_EXTENDED_ESCAPE in the GSM alphabet string 55 */ 56 public static int 57 charToGsm(char c) { 58 try { 59 return charToGsm(c, false); 60 } catch (EncodeException ex) { 61 // this should never happen 62 return sGsmSpaceChar; 63 } 64 } 65 66 /** 67 * char to GSM alphabet char 68 * @param throwException If true, throws EncodeException on invalid char. 69 * If false, returns GSM alphabet ' ' char. 70 * 71 * Returns GSM_EXTENDED_ESCAPE if this character is in the extended table 72 * In this case, you must call charToGsmExtended() for the value that 73 * should follow GSM_EXTENDED_ESCAPE in the GSM alphabet string 74 */ 75 76 public static int 77 charToGsm(char c, boolean throwException) throws EncodeException { 78 int ret; 79 80 ret = charToGsm.get(c, -1); 81 82 if (ret == -1) { 83 ret = charToGsmExtended.get(c, -1); 84 85 if (ret == -1) { 86 if (throwException) { 87 throw new EncodeException(c); 88 } else { 89 return sGsmSpaceChar; 90 } 91 } else { 92 return GSM_EXTENDED_ESCAPE; 93 } 94 } 95 96 return ret; 97 98 } 99 100 101 /** 102 * char to extended GSM alphabet char 103 * 104 * Extended chars should be escaped with GSM_EXTENDED_ESCAPE 105 * 106 * Returns ' ' in GSM alphabet if there's no possible match 107 * 108 */ 109 public static int 110 charToGsmExtended(char c) { 111 int ret; 112 113 ret = charToGsmExtended.get(c, -1); 114 115 if (ret == -1) { 116 return sGsmSpaceChar; 117 } 118 119 return ret; 120 } 121 122 /** 123 * Converts a character in the GSM alphabet into a char 124 * 125 * if GSM_EXTENDED_ESCAPE is passed, 0xffff is returned. In this case, 126 * the following character in the stream should be decoded with 127 * gsmExtendedToChar() 128 * 129 * If an unmappable value is passed (one greater than 127), ' ' is returned 130 */ 131 132 public static char 133 gsmToChar(int gsmChar) { 134 return (char)gsmToChar.get(gsmChar, ' '); 135 } 136 137 /** 138 * Converts a character in the extended GSM alphabet into a char 139 * 140 * if GSM_EXTENDED_ESCAPE is passed, ' ' is returned since no second 141 * extension page has yet been defined (see Note 1 in table 6.2.1.1 of 142 * TS 23.038 v7.00) 143 * 144 * If an unmappable value is passed , ' ' is returned 145 */ 146 147 public static char 148 gsmExtendedToChar(int gsmChar) { 149 int ret; 150 151 ret = gsmExtendedToChar.get(gsmChar, -1); 152 153 if (ret == -1) { 154 return ' '; 155 } 156 157 return (char)ret; 158 } 159 160 /** 161 * Converts a String into a byte array containing the 7-bit packed 162 * GSM Alphabet representation of the string. If a header is provided, 163 * this is included in the returned byte array and padded to a septet 164 * boundary. 165 * 166 * Unencodable chars are encoded as spaces 167 * 168 * Byte 0 in the returned byte array is the count of septets used, 169 * including the header and header padding. The returned byte array is 170 * the minimum size required to store the packed septets. The returned 171 * array cannot contain more than 255 septets. 172 * 173 * @param data The text string to encode. 174 * @param header Optional header (includeing length byte) that precedes 175 * the encoded data, padded to septet boundary. 176 * @return Byte array containing header and encoded data. 177 */ 178 public static byte[] stringToGsm7BitPackedWithHeader(String data, byte[] header) 179 throws EncodeException { 180 181 if (header == null || header.length == 0) { 182 return stringToGsm7BitPacked(data); 183 } 184 185 int headerBits = (header.length + 1) * 8; 186 int headerSeptets = (headerBits + 6) / 7; 187 188 byte[] ret = stringToGsm7BitPacked(data, headerSeptets, true); 189 190 // Paste in the header 191 ret[1] = (byte)header.length; 192 System.arraycopy(header, 0, ret, 2, header.length); 193 return ret; 194 } 195 196 /** 197 * Converts a String into a byte array containing 198 * the 7-bit packed GSM Alphabet representation of the string. 199 * 200 * Unencodable chars are encoded as spaces 201 * 202 * Byte 0 in the returned byte array is the count of septets used 203 * The returned byte array is the minimum size required to store 204 * the packed septets. The returned array cannot contain more than 255 205 * septets. 206 * 207 * @param data the data string to endcode 208 * @throws EncodeException if String is too large to encode 209 */ 210 public static byte[] stringToGsm7BitPacked(String data) 211 throws EncodeException { 212 return stringToGsm7BitPacked(data, 0, true); 213 } 214 215 /** 216 * Converts a String into a byte array containing 217 * the 7-bit packed GSM Alphabet representation of the string. 218 * 219 * Byte 0 in the returned byte array is the count of septets used 220 * The returned byte array is the minimum size required to store 221 * the packed septets. The returned array cannot contain more than 255 222 * septets. 223 * 224 * @param data the text to convert to septets 225 * @param startingSeptetOffset the number of padding septets to put before 226 * the character data at the begining of the array 227 * @param throwException If true, throws EncodeException on invalid char. 228 * If false, replaces unencodable char with GSM alphabet space char. 229 * 230 * @throws EncodeException if String is too large to encode 231 */ 232 public static byte[] stringToGsm7BitPacked(String data, int startingSeptetOffset, 233 boolean throwException) throws EncodeException { 234 int dataLen = data.length(); 235 int septetCount = countGsmSeptets(data, throwException) + startingSeptetOffset; 236 if (septetCount > 255) { 237 throw new EncodeException("Payload cannot exceed 255 septets"); 238 } 239 int byteCount = ((septetCount * 7) + 7) / 8; 240 byte[] ret = new byte[byteCount + 1]; // Include space for one byte length prefix. 241 for (int i = 0, septets = startingSeptetOffset, bitOffset = startingSeptetOffset * 7; 242 i < dataLen && septets < septetCount; 243 i++, bitOffset += 7) { 244 char c = data.charAt(i); 245 int v = GsmAlphabet.charToGsm(c, throwException); 246 if (v == GSM_EXTENDED_ESCAPE) { 247 v = GsmAlphabet.charToGsmExtended(c); // Lookup the extended char. 248 packSmsChar(ret, bitOffset, GSM_EXTENDED_ESCAPE); 249 bitOffset += 7; 250 septets++; 251 } 252 packSmsChar(ret, bitOffset, v); 253 septets++; 254 } 255 ret[0] = (byte) (septetCount); // Validated by check above. 256 return ret; 257 } 258 259 /** 260 * Pack a 7-bit char into its appropirate place in a byte array 261 * 262 * @param bitOffset the bit offset that the septet should be packed at 263 * (septet index * 7) 264 */ 265 private static void 266 packSmsChar(byte[] packedChars, int bitOffset, int value) { 267 int byteOffset = bitOffset / 8; 268 int shift = bitOffset % 8; 269 270 packedChars[++byteOffset] |= value << shift; 271 272 if (shift > 1) { 273 packedChars[++byteOffset] = (byte)(value >> (8 - shift)); 274 } 275 } 276 277 /** 278 * Convert a GSM alphabet 7 bit packed string (SMS string) into a 279 * {@link java.lang.String}. 280 * 281 * See TS 23.038 6.1.2.1 for SMS Character Packing 282 * 283 * @param pdu the raw data from the pdu 284 * @param offset the byte offset of 285 * @param lengthSeptets string length in septets, not bytes 286 * @return String representation or null on decoding exception 287 */ 288 public static String gsm7BitPackedToString(byte[] pdu, int offset, 289 int lengthSeptets) { 290 return gsm7BitPackedToString(pdu, offset, lengthSeptets, 0); 291 } 292 293 /** 294 * Convert a GSM alphabet 7 bit packed string (SMS string) into a 295 * {@link java.lang.String}. 296 * 297 * See TS 23.038 6.1.2.1 for SMS Character Packing 298 * 299 * @param pdu the raw data from the pdu 300 * @param offset the byte offset of 301 * @param lengthSeptets string length in septets, not bytes 302 * @param numPaddingBits the number of padding bits before the start of the 303 * string in the first byte 304 * @return String representation or null on decoding exception 305 */ 306 public static String gsm7BitPackedToString(byte[] pdu, int offset, 307 int lengthSeptets, int numPaddingBits) { 308 StringBuilder ret = new StringBuilder(lengthSeptets); 309 boolean prevCharWasEscape; 310 311 try { 312 prevCharWasEscape = false; 313 314 for (int i = 0 ; i < lengthSeptets ; i++) { 315 int bitOffset = (7 * i) + numPaddingBits; 316 317 int byteOffset = bitOffset / 8; 318 int shift = bitOffset % 8; 319 int gsmVal; 320 321 gsmVal = (0x7f & (pdu[offset + byteOffset] >> shift)); 322 323 // if it crosses a byte boundry 324 if (shift > 1) { 325 // set msb bits to 0 326 gsmVal &= 0x7f >> (shift - 1); 327 328 gsmVal |= 0x7f & (pdu[offset + byteOffset + 1] << (8 - shift)); 329 } 330 331 if (prevCharWasEscape) { 332 ret.append(GsmAlphabet.gsmExtendedToChar(gsmVal)); 333 prevCharWasEscape = false; 334 } else if (gsmVal == GSM_EXTENDED_ESCAPE) { 335 prevCharWasEscape = true; 336 } else { 337 ret.append(GsmAlphabet.gsmToChar(gsmVal)); 338 } 339 } 340 } catch (RuntimeException ex) { 341 Log.e(LOG_TAG, "Error GSM 7 bit packed: ", ex); 342 return null; 343 } 344 345 return ret.toString(); 346 } 347 348 349 /** 350 * Convert a GSM alphabet string that's stored in 8-bit unpacked 351 * format (as it often appears in SIM records) into a String 352 * 353 * Field may be padded with trailing 0xff's. The decode stops 354 * at the first 0xff encountered. 355 */ 356 public static String 357 gsm8BitUnpackedToString(byte[] data, int offset, int length) { 358 boolean prevWasEscape; 359 StringBuilder ret = new StringBuilder(length); 360 361 prevWasEscape = false; 362 for (int i = offset ; i < offset + length ; i++) { 363 // Never underestimate the pain that can be caused 364 // by signed bytes 365 int c = data[i] & 0xff; 366 367 if (c == 0xff) { 368 break; 369 } else if (c == GSM_EXTENDED_ESCAPE) { 370 if (prevWasEscape) { 371 // Two escape chars in a row 372 // We treat this as a space 373 // See Note 1 in table 6.2.1.1 of TS 23.038 v7.00 374 ret.append(' '); 375 prevWasEscape = false; 376 } else { 377 prevWasEscape = true; 378 } 379 } else { 380 if (prevWasEscape) { 381 ret.append((char)gsmExtendedToChar.get(c, ' ')); 382 } else { 383 ret.append((char)gsmToChar.get(c, ' ')); 384 } 385 prevWasEscape = false; 386 } 387 } 388 389 return ret.toString(); 390 } 391 392 /** 393 * Convert a string into an 8-bit unpacked GSM alphabet byte 394 * array 395 */ 396 public static byte[] 397 stringToGsm8BitPacked(String s) { 398 byte[] ret; 399 400 int septets = 0; 401 402 septets = countGsmSeptets(s); 403 404 // Enough for all the septets and the length byte prefix 405 ret = new byte[septets]; 406 407 stringToGsm8BitUnpackedField(s, ret, 0, ret.length); 408 409 return ret; 410 } 411 412 413 /** 414 * Write a String into a GSM 8-bit unpacked field of 415 * @param length size at @param offset in @param dest 416 * 417 * Field is padded with 0xff's, string is truncated if necessary 418 */ 419 420 public static void 421 stringToGsm8BitUnpackedField(String s, byte dest[], int offset, int length) { 422 int outByteIndex = offset; 423 424 // Septets are stored in byte-aligned octets 425 for (int i = 0, sz = s.length() 426 ; i < sz && (outByteIndex - offset) < length 427 ; i++ 428 ) { 429 char c = s.charAt(i); 430 431 int v = GsmAlphabet.charToGsm(c); 432 433 if (v == GSM_EXTENDED_ESCAPE) { 434 // make sure we can fit an escaped char 435 if (! (outByteIndex + 1 - offset < length)) { 436 break; 437 } 438 439 dest[outByteIndex++] = GSM_EXTENDED_ESCAPE; 440 441 v = GsmAlphabet.charToGsmExtended(c); 442 } 443 444 dest[outByteIndex++] = (byte)v; 445 } 446 447 // pad with 0xff's 448 while((outByteIndex - offset) < length) { 449 dest[outByteIndex++] = (byte)0xff; 450 } 451 } 452 453 /** 454 * Returns the count of 7-bit GSM alphabet characters 455 * needed to represent this character. Counts unencodable char as 1 septet. 456 */ 457 public static int 458 countGsmSeptets(char c) { 459 try { 460 return countGsmSeptets(c, false); 461 } catch (EncodeException ex) { 462 // This should never happen. 463 return 0; 464 } 465 } 466 467 /** 468 * Returns the count of 7-bit GSM alphabet characters 469 * needed to represent this character 470 * @param throwsException If true, throws EncodeException if unencodable 471 * char. Otherwise, counts invalid char as 1 septet 472 */ 473 public static int 474 countGsmSeptets(char c, boolean throwsException) throws EncodeException { 475 if (charToGsm.get(c, -1) != -1) { 476 return 1; 477 } 478 479 if (charToGsmExtended.get(c, -1) != -1) { 480 return 2; 481 } 482 483 if (throwsException) { 484 throw new EncodeException(c); 485 } else { 486 // count as a space char 487 return 1; 488 } 489 } 490 491 /** 492 * Returns the count of 7-bit GSM alphabet characters 493 * needed to represent this string. Counts unencodable char as 1 septet. 494 */ 495 public static int 496 countGsmSeptets(CharSequence s) { 497 try { 498 return countGsmSeptets(s, false); 499 } catch (EncodeException ex) { 500 // this should never happen 501 return 0; 502 } 503 } 504 505 /** 506 * Returns the count of 7-bit GSM alphabet characters 507 * needed to represent this string. 508 * @param throwsException If true, throws EncodeException if unencodable 509 * char. Otherwise, counts invalid char as 1 septet 510 */ 511 public static int 512 countGsmSeptets(CharSequence s, boolean throwsException) throws EncodeException { 513 int charIndex = 0; 514 int sz = s.length(); 515 int count = 0; 516 517 while (charIndex < sz) { 518 count += countGsmSeptets(s.charAt(charIndex), throwsException); 519 charIndex++; 520 } 521 522 return count; 523 } 524 525 /** 526 * Returns the index into <code>s</code> of the first character 527 * after <code>limit</code> septets have been reached, starting at 528 * index <code>start</code>. This is used when dividing messages 529 * into units within the SMS message size limit. 530 * 531 * @param s source string 532 * @param start index of where to start counting septets 533 * @param limit maximum septets to include, 534 * e.g. <code>MAX_USER_DATA_SEPTETS</code> 535 * @return index of first character that won't fit, or the length 536 * of the entire string if everything fits 537 */ 538 public static int 539 findGsmSeptetLimitIndex(String s, int start, int limit) { 540 int accumulator = 0; 541 int size = s.length(); 542 543 for (int i = start; i < size; i++) { 544 accumulator += countGsmSeptets(s.charAt(i)); 545 if (accumulator > limit) { 546 return i; 547 } 548 } 549 return size; 550 } 551 552 // Set in the static initializer 553 private static int sGsmSpaceChar; 554 555 private static final SparseIntArray charToGsm = new SparseIntArray(); 556 private static final SparseIntArray gsmToChar = new SparseIntArray(); 557 private static final SparseIntArray charToGsmExtended = new SparseIntArray(); 558 private static final SparseIntArray gsmExtendedToChar = new SparseIntArray(); 559 560 static { 561 int i = 0; 562 563 charToGsm.put('@', i++); 564 charToGsm.put('\u00a3', i++); 565 charToGsm.put('$', i++); 566 charToGsm.put('\u00a5', i++); 567 charToGsm.put('\u00e8', i++); 568 charToGsm.put('\u00e9', i++); 569 charToGsm.put('\u00f9', i++); 570 charToGsm.put('\u00ec', i++); 571 charToGsm.put('\u00f2', i++); 572 charToGsm.put('\u00c7', i++); 573 charToGsm.put('\n', i++); 574 charToGsm.put('\u00d8', i++); 575 charToGsm.put('\u00f8', i++); 576 charToGsm.put('\r', i++); 577 charToGsm.put('\u00c5', i++); 578 charToGsm.put('\u00e5', i++); 579 580 charToGsm.put('\u0394', i++); 581 charToGsm.put('_', i++); 582 charToGsm.put('\u03a6', i++); 583 charToGsm.put('\u0393', i++); 584 charToGsm.put('\u039b', i++); 585 charToGsm.put('\u03a9', i++); 586 charToGsm.put('\u03a0', i++); 587 charToGsm.put('\u03a8', i++); 588 charToGsm.put('\u03a3', i++); 589 charToGsm.put('\u0398', i++); 590 charToGsm.put('\u039e', i++); 591 charToGsm.put('\uffff', i++); 592 charToGsm.put('\u00c6', i++); 593 charToGsm.put('\u00e6', i++); 594 charToGsm.put('\u00df', i++); 595 charToGsm.put('\u00c9', i++); 596 597 charToGsm.put(' ', i++); 598 charToGsm.put('!', i++); 599 charToGsm.put('"', i++); 600 charToGsm.put('#', i++); 601 charToGsm.put('\u00a4', i++); 602 charToGsm.put('%', i++); 603 charToGsm.put('&', i++); 604 charToGsm.put('\'', i++); 605 charToGsm.put('(', i++); 606 charToGsm.put(')', i++); 607 charToGsm.put('*', i++); 608 charToGsm.put('+', i++); 609 charToGsm.put(',', i++); 610 charToGsm.put('-', i++); 611 charToGsm.put('.', i++); 612 charToGsm.put('/', i++); 613 614 charToGsm.put('0', i++); 615 charToGsm.put('1', i++); 616 charToGsm.put('2', i++); 617 charToGsm.put('3', i++); 618 charToGsm.put('4', i++); 619 charToGsm.put('5', i++); 620 charToGsm.put('6', i++); 621 charToGsm.put('7', i++); 622 charToGsm.put('8', i++); 623 charToGsm.put('9', i++); 624 charToGsm.put(':', i++); 625 charToGsm.put(';', i++); 626 charToGsm.put('<', i++); 627 charToGsm.put('=', i++); 628 charToGsm.put('>', i++); 629 charToGsm.put('?', i++); 630 631 charToGsm.put('\u00a1', i++); 632 charToGsm.put('A', i++); 633 charToGsm.put('B', i++); 634 charToGsm.put('C', i++); 635 charToGsm.put('D', i++); 636 charToGsm.put('E', i++); 637 charToGsm.put('F', i++); 638 charToGsm.put('G', i++); 639 charToGsm.put('H', i++); 640 charToGsm.put('I', i++); 641 charToGsm.put('J', i++); 642 charToGsm.put('K', i++); 643 charToGsm.put('L', i++); 644 charToGsm.put('M', i++); 645 charToGsm.put('N', i++); 646 charToGsm.put('O', i++); 647 648 charToGsm.put('P', i++); 649 charToGsm.put('Q', i++); 650 charToGsm.put('R', i++); 651 charToGsm.put('S', i++); 652 charToGsm.put('T', i++); 653 charToGsm.put('U', i++); 654 charToGsm.put('V', i++); 655 charToGsm.put('W', i++); 656 charToGsm.put('X', i++); 657 charToGsm.put('Y', i++); 658 charToGsm.put('Z', i++); 659 charToGsm.put('\u00c4', i++); 660 charToGsm.put('\u00d6', i++); 661 charToGsm.put('\u00d1', i++); 662 charToGsm.put('\u00dc', i++); 663 charToGsm.put('\u00a7', i++); 664 665 charToGsm.put('\u00bf', i++); 666 charToGsm.put('a', i++); 667 charToGsm.put('b', i++); 668 charToGsm.put('c', i++); 669 charToGsm.put('d', i++); 670 charToGsm.put('e', i++); 671 charToGsm.put('f', i++); 672 charToGsm.put('g', i++); 673 charToGsm.put('h', i++); 674 charToGsm.put('i', i++); 675 charToGsm.put('j', i++); 676 charToGsm.put('k', i++); 677 charToGsm.put('l', i++); 678 charToGsm.put('m', i++); 679 charToGsm.put('n', i++); 680 charToGsm.put('o', i++); 681 682 charToGsm.put('p', i++); 683 charToGsm.put('q', i++); 684 charToGsm.put('r', i++); 685 charToGsm.put('s', i++); 686 charToGsm.put('t', i++); 687 charToGsm.put('u', i++); 688 charToGsm.put('v', i++); 689 charToGsm.put('w', i++); 690 charToGsm.put('x', i++); 691 charToGsm.put('y', i++); 692 charToGsm.put('z', i++); 693 charToGsm.put('\u00e4', i++); 694 charToGsm.put('\u00f6', i++); 695 charToGsm.put('\u00f1', i++); 696 charToGsm.put('\u00fc', i++); 697 charToGsm.put('\u00e0', i++); 698 699 700 charToGsmExtended.put('\f', 10); 701 charToGsmExtended.put('^', 20); 702 charToGsmExtended.put('{', 40); 703 charToGsmExtended.put('}', 41); 704 charToGsmExtended.put('\\', 47); 705 charToGsmExtended.put('[', 60); 706 charToGsmExtended.put('~', 61); 707 charToGsmExtended.put(']', 62); 708 charToGsmExtended.put('|', 64); 709 charToGsmExtended.put('\u20ac', 101); 710 711 int size = charToGsm.size(); 712 for (int j=0; j<size; j++) { 713 gsmToChar.put(charToGsm.valueAt(j), charToGsm.keyAt(j)); 714 } 715 716 size = charToGsmExtended.size(); 717 for (int j=0; j<size; j++) { 718 gsmExtendedToChar.put(charToGsmExtended.valueAt(j), charToGsmExtended.keyAt(j)); 719 } 720 721 722 sGsmSpaceChar = charToGsm.get(' '); 723 } 724 725 726} 727