GsmAlphabet.java revision 17f616823a562ceb3a008f91e05d43bc56d37cae
1/*
2 * Copyright (C) 2006 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.internal.telephony;
18
19import android.telephony.SmsMessage;
20import android.util.SparseIntArray;
21
22import android.util.Log;
23
24/**
25 * This class implements the character set mapping between
26 * the GSM SMS 7-bit alphabet specified in TS 23.038 6.2.1
27 * and UTF-16
28 *
29 * {@hide}
30 */
31public class GsmAlphabet {
32    static final String LOG_TAG = "GSM";
33
34
35
36    //***** Constants
37
38    /**
39     * This escapes extended characters, and when present indicates that the
40     * following character should
41     * be looked up in the "extended" table
42     *
43     * gsmToChar(GSM_EXTENDED_ESCAPE) returns 0xffff
44     */
45
46    public static final byte GSM_EXTENDED_ESCAPE = 0x1B;
47
48
49    /**
50     * char to GSM alphabet char
51     * Returns ' ' in GSM alphabet if there's no possible match
52     * Returns GSM_EXTENDED_ESCAPE if this character is in the extended table
53     * In this case, you must call charToGsmExtended() for the value that
54     * should follow GSM_EXTENDED_ESCAPE in the GSM alphabet string
55     */
56    public static int
57    charToGsm(char c) {
58        try {
59            return charToGsm(c, false);
60        } catch (EncodeException ex) {
61            // this should never happen
62            return sGsmSpaceChar;
63        }
64    }
65
66    /**
67     * char to GSM alphabet char
68     * @param throwException If true, throws EncodeException on invalid char.
69     *   If false, returns GSM alphabet ' ' char.
70     *
71     * Returns GSM_EXTENDED_ESCAPE if this character is in the extended table
72     * In this case, you must call charToGsmExtended() for the value that
73     * should follow GSM_EXTENDED_ESCAPE in the GSM alphabet string
74     */
75
76    public static int
77    charToGsm(char c, boolean throwException) throws EncodeException {
78        int ret;
79
80        ret = charToGsm.get(c, -1);
81
82        if (ret == -1) {
83            ret = charToGsmExtended.get(c, -1);
84
85            if (ret == -1) {
86                if (throwException) {
87                    throw new EncodeException(c);
88                } else {
89                    return sGsmSpaceChar;
90                }
91            } else {
92                return GSM_EXTENDED_ESCAPE;
93            }
94        }
95
96        return ret;
97
98    }
99
100
101    /**
102     * char to extended GSM alphabet char
103     *
104     * Extended chars should be escaped with GSM_EXTENDED_ESCAPE
105     *
106     * Returns ' ' in GSM alphabet if there's no possible match
107     *
108     */
109    public static int
110    charToGsmExtended(char c) {
111        int ret;
112
113        ret = charToGsmExtended.get(c, -1);
114
115        if (ret == -1) {
116            return sGsmSpaceChar;
117        }
118
119        return ret;
120    }
121
122    /**
123     * Converts a character in the GSM alphabet into a char
124     *
125     * if GSM_EXTENDED_ESCAPE is passed, 0xffff is returned. In this case,
126     * the following character in the stream should be decoded with
127     * gsmExtendedToChar()
128     *
129     * If an unmappable value is passed (one greater than 127), ' ' is returned
130     */
131
132    public static char
133    gsmToChar(int gsmChar) {
134        return (char)gsmToChar.get(gsmChar, ' ');
135    }
136
137    /**
138     * Converts a character in the extended GSM alphabet into a char
139     *
140     * if GSM_EXTENDED_ESCAPE is passed, ' ' is returned since no second
141     * extension page has yet been defined (see Note 1 in table 6.2.1.1 of
142     * TS 23.038 v7.00)
143     *
144     * If an unmappable value is passed , ' ' is returned
145     */
146
147    public static char
148    gsmExtendedToChar(int gsmChar) {
149        int ret;
150
151        ret = gsmExtendedToChar.get(gsmChar, -1);
152
153        if (ret == -1) {
154            return ' ';
155        }
156
157        return (char)ret;
158    }
159
160    /**
161     * Converts a String into a byte array containing the 7-bit packed
162     * GSM Alphabet representation of the string. If a header is provided,
163     * this is included in the returned byte array and padded to a septet
164     * boundary.
165     *
166     * Unencodable chars are encoded as spaces
167     *
168     * Byte 0 in the returned byte array is the count of septets used,
169     * including the header and header padding. The returned byte array is
170     * the minimum size required to store the packed septets. The returned
171     * array cannot contain more than 255 septets.
172     *
173     * @param data The text string to encode.
174     * @param header Optional header (includeing length byte) that precedes
175     * the encoded data, padded to septet boundary.
176     * @return Byte array containing header and encoded data.
177     */
178    public static byte[] stringToGsm7BitPackedWithHeader(String data, byte[] header)
179            throws EncodeException {
180
181        if (header == null || header.length == 0) {
182            return stringToGsm7BitPacked(data);
183        }
184
185        int headerBits = (header.length + 1) * 8;
186        int headerSeptets = (headerBits + 6) / 7;
187
188        byte[] ret = stringToGsm7BitPacked(data, headerSeptets, true);
189
190        // Paste in the header
191        ret[1] = (byte)header.length;
192        System.arraycopy(header, 0, ret, 2, header.length);
193        return ret;
194    }
195
196    /**
197     * Converts a String into a byte array containing
198     * the 7-bit packed GSM Alphabet representation of the string.
199     *
200     * Unencodable chars are encoded as spaces
201     *
202     * Byte 0 in the returned byte array is the count of septets used
203     * The returned byte array is the minimum size required to store
204     * the packed septets. The returned array cannot contain more than 255
205     * septets.
206     *
207     * @param data the data string to endcode
208     * @throws EncodeException if String is too large to encode
209     */
210    public static byte[] stringToGsm7BitPacked(String data)
211            throws EncodeException {
212        return stringToGsm7BitPacked(data, 0, true);
213    }
214
215    /**
216     * Converts a String into a byte array containing
217     * the 7-bit packed GSM Alphabet representation of the string.
218     *
219     * Byte 0 in the returned byte array is the count of septets used
220     * The returned byte array is the minimum size required to store
221     * the packed septets. The returned array cannot contain more than 255
222     * septets.
223     *
224     * @param data the text to convert to septets
225     * @param startingSeptetOffset the number of padding septets to put before
226     *  the character data at the begining of the array
227     * @param throwException If true, throws EncodeException on invalid char.
228     *   If false, replaces unencodable char with GSM alphabet space char.
229     *
230     * @throws EncodeException if String is too large to encode
231     */
232    public static byte[] stringToGsm7BitPacked(String data, int startingSeptetOffset,
233            boolean throwException) throws EncodeException {
234        int dataLen = data.length();
235        int septetCount = countGsmSeptets(data, throwException) + startingSeptetOffset;
236        if (septetCount > 255) {
237            throw new EncodeException("Payload cannot exceed 255 septets");
238        }
239        int byteCount = ((septetCount * 7) + 7) / 8;
240        byte[] ret = new byte[byteCount + 1];  // Include space for one byte length prefix.
241        for (int i = 0, septets = startingSeptetOffset, bitOffset = startingSeptetOffset * 7;
242                 i < dataLen && septets < septetCount;
243                 i++, bitOffset += 7) {
244            char c = data.charAt(i);
245            int v = GsmAlphabet.charToGsm(c, throwException);
246            if (v == GSM_EXTENDED_ESCAPE) {
247                v = GsmAlphabet.charToGsmExtended(c);  // Lookup the extended char.
248                packSmsChar(ret, bitOffset, GSM_EXTENDED_ESCAPE);
249                bitOffset += 7;
250                septets++;
251            }
252            packSmsChar(ret, bitOffset, v);
253            septets++;
254        }
255        ret[0] = (byte) (septetCount);  // Validated by check above.
256        return ret;
257    }
258
259    /**
260     * Pack a 7-bit char into its appropirate place in a byte array
261     *
262     * @param bitOffset the bit offset that the septet should be packed at
263     *                  (septet index * 7)
264     */
265    private static void
266    packSmsChar(byte[] packedChars, int bitOffset, int value) {
267        int byteOffset = bitOffset / 8;
268        int shift = bitOffset % 8;
269
270        packedChars[++byteOffset] |= value << shift;
271
272        if (shift > 1) {
273            packedChars[++byteOffset] = (byte)(value >> (8 - shift));
274        }
275    }
276
277    /**
278     * Convert a GSM alphabet 7 bit packed string (SMS string) into a
279     * {@link java.lang.String}.
280     *
281     * See TS 23.038 6.1.2.1 for SMS Character Packing
282     *
283     * @param pdu the raw data from the pdu
284     * @param offset the byte offset of
285     * @param lengthSeptets string length in septets, not bytes
286     * @return String representation or null on decoding exception
287     */
288    public static String gsm7BitPackedToString(byte[] pdu, int offset,
289            int lengthSeptets) {
290        return gsm7BitPackedToString(pdu, offset, lengthSeptets, 0);
291    }
292
293    /**
294     * Convert a GSM alphabet 7 bit packed string (SMS string) into a
295     * {@link java.lang.String}.
296     *
297     * See TS 23.038 6.1.2.1 for SMS Character Packing
298     *
299     * @param pdu the raw data from the pdu
300     * @param offset the byte offset of
301     * @param lengthSeptets string length in septets, not bytes
302     * @param numPaddingBits the number of padding bits before the start of the
303     *  string in the first byte
304     * @return String representation or null on decoding exception
305     */
306    public static String gsm7BitPackedToString(byte[] pdu, int offset,
307            int lengthSeptets, int numPaddingBits) {
308        StringBuilder ret = new StringBuilder(lengthSeptets);
309        boolean prevCharWasEscape;
310
311        try {
312            prevCharWasEscape = false;
313
314            for (int i = 0 ; i < lengthSeptets ; i++) {
315                int bitOffset = (7 * i) + numPaddingBits;
316
317                int byteOffset = bitOffset / 8;
318                int shift = bitOffset % 8;
319                int gsmVal;
320
321                gsmVal = (0x7f & (pdu[offset + byteOffset] >> shift));
322
323                // if it crosses a byte boundry
324                if (shift > 1) {
325                    // set msb bits to 0
326                    gsmVal &= 0x7f >> (shift - 1);
327
328                    gsmVal |= 0x7f & (pdu[offset + byteOffset + 1] << (8 - shift));
329                }
330
331                if (prevCharWasEscape) {
332                    ret.append(GsmAlphabet.gsmExtendedToChar(gsmVal));
333                    prevCharWasEscape = false;
334                } else if (gsmVal == GSM_EXTENDED_ESCAPE) {
335                    prevCharWasEscape = true;
336                } else {
337                    ret.append(GsmAlphabet.gsmToChar(gsmVal));
338                }
339            }
340        } catch (RuntimeException ex) {
341            Log.e(LOG_TAG, "Error GSM 7 bit packed: ", ex);
342            return null;
343        }
344
345        return ret.toString();
346    }
347
348
349    /**
350     * Convert a GSM alphabet string that's stored in 8-bit unpacked
351     * format (as it often appears in SIM records) into a String
352     *
353     * Field may be padded with trailing 0xff's. The decode stops
354     * at the first 0xff encountered.
355     */
356    public static String
357    gsm8BitUnpackedToString(byte[] data, int offset, int length) {
358        boolean prevWasEscape;
359        StringBuilder ret = new StringBuilder(length);
360
361        prevWasEscape = false;
362        for (int i = offset ; i < offset + length ; i++) {
363            // Never underestimate the pain that can be caused
364            // by signed bytes
365            int c = data[i] & 0xff;
366
367            if (c == 0xff) {
368                break;
369            } else if (c == GSM_EXTENDED_ESCAPE) {
370                if (prevWasEscape) {
371                    // Two escape chars in a row
372                    // We treat this as a space
373                    // See Note 1 in table 6.2.1.1 of TS 23.038 v7.00
374                    ret.append(' ');
375                    prevWasEscape = false;
376                } else {
377                    prevWasEscape = true;
378                }
379            } else {
380                if (prevWasEscape) {
381                    ret.append((char)gsmExtendedToChar.get(c, ' '));
382                } else {
383                    ret.append((char)gsmToChar.get(c, ' '));
384                }
385                prevWasEscape = false;
386            }
387        }
388
389        return ret.toString();
390    }
391
392    /**
393     * Convert a string into an 8-bit unpacked GSM alphabet byte
394     * array
395     */
396    public static byte[]
397    stringToGsm8BitPacked(String s) {
398        byte[] ret;
399
400        int septets = 0;
401
402        septets = countGsmSeptets(s);
403
404        // Enough for all the septets and the length byte prefix
405        ret = new byte[septets];
406
407        stringToGsm8BitUnpackedField(s, ret, 0, ret.length);
408
409        return ret;
410    }
411
412
413    /**
414     * Write a String into a GSM 8-bit unpacked field of
415     * @param length size at @param offset in @param dest
416     *
417     * Field is padded with 0xff's, string is truncated if necessary
418     */
419
420    public static void
421    stringToGsm8BitUnpackedField(String s, byte dest[], int offset, int length) {
422        int outByteIndex = offset;
423
424        // Septets are stored in byte-aligned octets
425        for (int i = 0, sz = s.length()
426                ; i < sz && (outByteIndex - offset) < length
427                ; i++
428        ) {
429            char c = s.charAt(i);
430
431            int v = GsmAlphabet.charToGsm(c);
432
433            if (v == GSM_EXTENDED_ESCAPE) {
434                // make sure we can fit an escaped char
435                if (! (outByteIndex + 1 - offset < length)) {
436                    break;
437                }
438
439                dest[outByteIndex++] = GSM_EXTENDED_ESCAPE;
440
441                v = GsmAlphabet.charToGsmExtended(c);
442            }
443
444            dest[outByteIndex++] = (byte)v;
445        }
446
447        // pad with 0xff's
448        while((outByteIndex - offset) < length) {
449            dest[outByteIndex++] = (byte)0xff;
450        }
451    }
452
453    /**
454     * Returns the count of 7-bit GSM alphabet characters
455     * needed to represent this character. Counts unencodable char as 1 septet.
456     */
457    public static int
458    countGsmSeptets(char c) {
459        try {
460            return countGsmSeptets(c, false);
461        } catch (EncodeException ex) {
462            // This should never happen.
463            return 0;
464        }
465    }
466
467    /**
468     * Returns the count of 7-bit GSM alphabet characters
469     * needed to represent this character
470     * @param throwsException If true, throws EncodeException if unencodable
471     * char. Otherwise, counts invalid char as 1 septet
472     */
473    public static int
474    countGsmSeptets(char c, boolean throwsException) throws EncodeException {
475        if (charToGsm.get(c, -1) != -1) {
476            return 1;
477        }
478
479        if (charToGsmExtended.get(c, -1) != -1) {
480            return 2;
481        }
482
483        if (throwsException) {
484            throw new EncodeException(c);
485        } else {
486            // count as a space char
487            return 1;
488        }
489    }
490
491    /**
492     * Returns the count of 7-bit GSM alphabet characters
493     * needed to represent this string. Counts unencodable char as 1 septet.
494     */
495    public static int
496    countGsmSeptets(CharSequence s) {
497        try {
498            return countGsmSeptets(s, false);
499        } catch (EncodeException ex) {
500            // this should never happen
501            return 0;
502        }
503    }
504
505    /**
506     * Returns the count of 7-bit GSM alphabet characters
507     * needed to represent this string.
508     * @param throwsException If true, throws EncodeException if unencodable
509     * char. Otherwise, counts invalid char as 1 septet
510     */
511    public static int
512    countGsmSeptets(CharSequence s, boolean throwsException) throws EncodeException {
513        int charIndex = 0;
514        int sz = s.length();
515        int count = 0;
516
517        while (charIndex < sz) {
518            count += countGsmSeptets(s.charAt(charIndex), throwsException);
519            charIndex++;
520        }
521
522        return count;
523    }
524
525    /**
526     * Returns the index into <code>s</code> of the first character
527     * after <code>limit</code> septets have been reached, starting at
528     * index <code>start</code>.  This is used when dividing messages
529     * into units within the SMS message size limit.
530     *
531     * @param s source string
532     * @param start index of where to start counting septets
533     * @param limit maximum septets to include,
534     *   e.g. <code>MAX_USER_DATA_SEPTETS</code>
535     * @return index of first character that won't fit, or the length
536     *   of the entire string if everything fits
537     */
538    public static int
539    findGsmSeptetLimitIndex(String s, int start, int limit) {
540        int accumulator = 0;
541        int size = s.length();
542
543        for (int i = start; i < size; i++) {
544            accumulator += countGsmSeptets(s.charAt(i));
545            if (accumulator > limit) {
546                return i;
547            }
548        }
549        return size;
550    }
551
552    // Set in the static initializer
553    private static int sGsmSpaceChar;
554
555    private static final SparseIntArray charToGsm = new SparseIntArray();
556    private static final SparseIntArray gsmToChar = new SparseIntArray();
557    private static final SparseIntArray charToGsmExtended = new SparseIntArray();
558    private static final SparseIntArray gsmExtendedToChar = new SparseIntArray();
559
560    static {
561        int i = 0;
562
563        charToGsm.put('@', i++);
564        charToGsm.put('\u00a3', i++);
565        charToGsm.put('$', i++);
566        charToGsm.put('\u00a5', i++);
567        charToGsm.put('\u00e8', i++);
568        charToGsm.put('\u00e9', i++);
569        charToGsm.put('\u00f9', i++);
570        charToGsm.put('\u00ec', i++);
571        charToGsm.put('\u00f2', i++);
572        charToGsm.put('\u00c7', i++);
573        charToGsm.put('\n', i++);
574        charToGsm.put('\u00d8', i++);
575        charToGsm.put('\u00f8', i++);
576        charToGsm.put('\r', i++);
577        charToGsm.put('\u00c5', i++);
578        charToGsm.put('\u00e5', i++);
579
580        charToGsm.put('\u0394', i++);
581        charToGsm.put('_', i++);
582        charToGsm.put('\u03a6', i++);
583        charToGsm.put('\u0393', i++);
584        charToGsm.put('\u039b', i++);
585        charToGsm.put('\u03a9', i++);
586        charToGsm.put('\u03a0', i++);
587        charToGsm.put('\u03a8', i++);
588        charToGsm.put('\u03a3', i++);
589        charToGsm.put('\u0398', i++);
590        charToGsm.put('\u039e', i++);
591        charToGsm.put('\uffff', i++);
592        charToGsm.put('\u00c6', i++);
593        charToGsm.put('\u00e6', i++);
594        charToGsm.put('\u00df', i++);
595        charToGsm.put('\u00c9', i++);
596
597        charToGsm.put(' ', i++);
598        charToGsm.put('!', i++);
599        charToGsm.put('"', i++);
600        charToGsm.put('#', i++);
601        charToGsm.put('\u00a4', i++);
602        charToGsm.put('%', i++);
603        charToGsm.put('&', i++);
604        charToGsm.put('\'', i++);
605        charToGsm.put('(', i++);
606        charToGsm.put(')', i++);
607        charToGsm.put('*', i++);
608        charToGsm.put('+', i++);
609        charToGsm.put(',', i++);
610        charToGsm.put('-', i++);
611        charToGsm.put('.', i++);
612        charToGsm.put('/', i++);
613
614        charToGsm.put('0', i++);
615        charToGsm.put('1', i++);
616        charToGsm.put('2', i++);
617        charToGsm.put('3', i++);
618        charToGsm.put('4', i++);
619        charToGsm.put('5', i++);
620        charToGsm.put('6', i++);
621        charToGsm.put('7', i++);
622        charToGsm.put('8', i++);
623        charToGsm.put('9', i++);
624        charToGsm.put(':', i++);
625        charToGsm.put(';', i++);
626        charToGsm.put('<', i++);
627        charToGsm.put('=', i++);
628        charToGsm.put('>', i++);
629        charToGsm.put('?', i++);
630
631        charToGsm.put('\u00a1', i++);
632        charToGsm.put('A', i++);
633        charToGsm.put('B', i++);
634        charToGsm.put('C', i++);
635        charToGsm.put('D', i++);
636        charToGsm.put('E', i++);
637        charToGsm.put('F', i++);
638        charToGsm.put('G', i++);
639        charToGsm.put('H', i++);
640        charToGsm.put('I', i++);
641        charToGsm.put('J', i++);
642        charToGsm.put('K', i++);
643        charToGsm.put('L', i++);
644        charToGsm.put('M', i++);
645        charToGsm.put('N', i++);
646        charToGsm.put('O', i++);
647
648        charToGsm.put('P', i++);
649        charToGsm.put('Q', i++);
650        charToGsm.put('R', i++);
651        charToGsm.put('S', i++);
652        charToGsm.put('T', i++);
653        charToGsm.put('U', i++);
654        charToGsm.put('V', i++);
655        charToGsm.put('W', i++);
656        charToGsm.put('X', i++);
657        charToGsm.put('Y', i++);
658        charToGsm.put('Z', i++);
659        charToGsm.put('\u00c4', i++);
660        charToGsm.put('\u00d6', i++);
661        charToGsm.put('\u00d1', i++);
662        charToGsm.put('\u00dc', i++);
663        charToGsm.put('\u00a7', i++);
664
665        charToGsm.put('\u00bf', i++);
666        charToGsm.put('a', i++);
667        charToGsm.put('b', i++);
668        charToGsm.put('c', i++);
669        charToGsm.put('d', i++);
670        charToGsm.put('e', i++);
671        charToGsm.put('f', i++);
672        charToGsm.put('g', i++);
673        charToGsm.put('h', i++);
674        charToGsm.put('i', i++);
675        charToGsm.put('j', i++);
676        charToGsm.put('k', i++);
677        charToGsm.put('l', i++);
678        charToGsm.put('m', i++);
679        charToGsm.put('n', i++);
680        charToGsm.put('o', i++);
681
682        charToGsm.put('p', i++);
683        charToGsm.put('q', i++);
684        charToGsm.put('r', i++);
685        charToGsm.put('s', i++);
686        charToGsm.put('t', i++);
687        charToGsm.put('u', i++);
688        charToGsm.put('v', i++);
689        charToGsm.put('w', i++);
690        charToGsm.put('x', i++);
691        charToGsm.put('y', i++);
692        charToGsm.put('z', i++);
693        charToGsm.put('\u00e4', i++);
694        charToGsm.put('\u00f6', i++);
695        charToGsm.put('\u00f1', i++);
696        charToGsm.put('\u00fc', i++);
697        charToGsm.put('\u00e0', i++);
698
699
700        charToGsmExtended.put('\f', 10);
701        charToGsmExtended.put('^', 20);
702        charToGsmExtended.put('{', 40);
703        charToGsmExtended.put('}', 41);
704        charToGsmExtended.put('\\', 47);
705        charToGsmExtended.put('[', 60);
706        charToGsmExtended.put('~', 61);
707        charToGsmExtended.put(']', 62);
708        charToGsmExtended.put('|', 64);
709        charToGsmExtended.put('\u20ac', 101);
710
711        int size = charToGsm.size();
712        for (int j=0; j<size; j++) {
713            gsmToChar.put(charToGsm.valueAt(j), charToGsm.keyAt(j));
714        }
715
716        size = charToGsmExtended.size();
717        for (int j=0; j<size; j++) {
718            gsmExtendedToChar.put(charToGsmExtended.valueAt(j), charToGsmExtended.keyAt(j));
719        }
720
721
722        sGsmSpaceChar = charToGsm.get(' ');
723    }
724
725
726}
727