gsm-03.38-2000.ucm revision c73f511526464f8e56c242df80552e9b0d94ae3d
1# Copyright (c) 2000 Unicode, Inc.  All Rights reserved.
2#
3# Name:             GSM 03.38 to Unicode
4# Unicode version:  3.0
5# Table version:    1.1
6# Date:             2000 May 30
7# Authors:          Ken Whistler
8#                   Kent Karlsson
9#                   Markus Kuhn
10#
11# Source:           http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT
12# See there for the license and for a description of the charset.
13# Formatted into ICU .ucm format by Markus Scherer on 2006-nov-02.
14# Commented-out mappings are turned into fallbacks (|1), all others are turned
15# into round-trips (|0).
16# Multi-byte mappings are preserved as multi-single-byte character mappings,
17# using ICU's m:n conversion capability.
18#
19# The substitution character is not documented in the Unicode file.
20# \x3F is chosen here because \x1A is a graphic character.
21#
22# Other deviations from the Unicode file:
23# a)
24# The GSM standard specifies that one or two ESC bytes (\x1B), if not followed
25# by a recognized final byte, be mapped to spaces (that is, reverse fallbacks
26# to U+0020).
27# The Unicode file round-trips a single \x1B to U+00A0 (NBSP) and has no mapping
28# for \x1B\x1B.
29# (Reverse fallbacks to U+00A0 would result in Unicode text that cannot be
30# converted back to GSM 03.38. A roundtrip for U+00A0 adds a character that is
31# not mappable in the standard.)
32#
33# See the ietf-charsets list email "Re: GSM 03.38 substitution character?"
34# at http://mail.apps.ietf.org/ietf/charsets/msg01696.html
35#
36# b)
37# The GSM standard maps U+00C7 capital C-cedilla to \x09 but the Unicode file
38# contains and documents a "fix" to map U+00E7 small c-cedilla instead, based on
39# an interpretation of the intent of the standard. Prevailing implementations
40# in mobile phones follow the standard.
41#
42# This file follows the GSM standard.
43#
44# See the GSM standard at
45# http://www.3gpp.org/ftp/Specs/archive/03_series/03.38/0338-720.zip
46#
47# For problems with the table format please submit a bug
48# at http://www.icu-project.org/ .
49# For issues with the mappings please contact Unicode
50# at http://www.unicode.org/reporting.html
51
52<code_set_name>     "gsm-03.38-2000"
53<char_name_mask>    "AXXXX"
54<mb_cur_max>        1
55<mb_cur_min>        1
56<uconv_class>       "SBCS"
57<icu:state>         0-7f
58<subchar>           \x3F
59<icu:charsetFamily> "ASCII"
60
61CHARMAP
62<U0000> \x00 |1
63<U000A> \x0A |0
64<U000C> \x1B\x0A |0
65<U000D> \x0D |0
66<U0020> \x20 |0
67<U0020> \x1B |3
68<U0020> \x1B\x1B |3
69<U0021> \x21 |0
70<U0022> \x22 |0
71<U0023> \x23 |0
72<U0024> \x02 |0
73<U0025> \x25 |0
74<U0026> \x26 |0
75<U0027> \x27 |0
76<U0028> \x28 |0
77<U0029> \x29 |0
78<U002A> \x2A |0
79<U002B> \x2B |0
80<U002C> \x2C |0
81<U002D> \x2D |0
82<U002E> \x2E |0
83<U002F> \x2F |0
84<U0030> \x30 |0
85<U0031> \x31 |0
86<U0032> \x32 |0
87<U0033> \x33 |0
88<U0034> \x34 |0
89<U0035> \x35 |0
90<U0036> \x36 |0
91<U0037> \x37 |0
92<U0038> \x38 |0
93<U0039> \x39 |0
94<U003A> \x3A |0
95<U003B> \x3B |0
96<U003C> \x3C |0
97<U003D> \x3D |0
98<U003E> \x3E |0
99<U003F> \x3F |0
100<U0040> \x00 |0
101<U0041> \x41 |0
102<U0042> \x42 |0
103<U0043> \x43 |0
104<U0044> \x44 |0
105<U0045> \x45 |0
106<U0046> \x46 |0
107<U0047> \x47 |0
108<U0048> \x48 |0
109<U0049> \x49 |0
110<U004A> \x4A |0
111<U004B> \x4B |0
112<U004C> \x4C |0
113<U004D> \x4D |0
114<U004E> \x4E |0
115<U004F> \x4F |0
116<U0050> \x50 |0
117<U0051> \x51 |0
118<U0052> \x52 |0
119<U0053> \x53 |0
120<U0054> \x54 |0
121<U0055> \x55 |0
122<U0056> \x56 |0
123<U0057> \x57 |0
124<U0058> \x58 |0
125<U0059> \x59 |0
126<U005A> \x5A |0
127<U005B> \x1B\x3C |0
128<U005C> \x1B\x2F |0
129<U005D> \x1B\x3E |0
130<U005E> \x1B\x14 |0
131<U005F> \x11 |0
132<U0061> \x61 |0
133<U0062> \x62 |0
134<U0063> \x63 |0
135<U0064> \x64 |0
136<U0065> \x65 |0
137<U0066> \x66 |0
138<U0067> \x67 |0
139<U0068> \x68 |0
140<U0069> \x69 |0
141<U006A> \x6A |0
142<U006B> \x6B |0
143<U006C> \x6C |0
144<U006D> \x6D |0
145<U006E> \x6E |0
146<U006F> \x6F |0
147<U0070> \x70 |0
148<U0071> \x71 |0
149<U0072> \x72 |0
150<U0073> \x73 |0
151<U0074> \x74 |0
152<U0075> \x75 |0
153<U0076> \x76 |0
154<U0077> \x77 |0
155<U0078> \x78 |0
156<U0079> \x79 |0
157<U007A> \x7A |0
158<U007B> \x1B\x28 |0
159<U007C> \x1B\x40 |0
160<U007D> \x1B\x29 |0
161<U007E> \x1B\x3D |0
162<U00A1> \x40 |0
163<U00A3> \x01 |0
164<U00A4> \x24 |0
165<U00A5> \x03 |0
166<U00A7> \x5F |0
167<U00BF> \x60 |0
168<U00C4> \x5B |0
169<U00C5> \x0E |0
170<U00C6> \x1C |0
171<U00C7> \x09 |0
172<U00C9> \x1F |0
173<U00D1> \x5D |0
174<U00D6> \x5C |0
175<U00D8> \x0B |0
176<U00DC> \x5E |0
177<U00DF> \x1E |0
178<U00E0> \x7F |0
179<U00E4> \x7B |0
180<U00E5> \x0F |0
181<U00E6> \x1D |0
182<U00E8> \x04 |0
183<U00E9> \x05 |0
184<U00EC> \x07 |0
185<U00F1> \x7D |0
186<U00F2> \x08 |0
187<U00F6> \x7C |0
188<U00F8> \x0C |0
189<U00F9> \x06 |0
190<U00FC> \x7E |0
191<U0391> \x41 |1
192<U0392> \x42 |1
193<U0393> \x13 |0
194<U0394> \x10 |0
195<U0395> \x45 |1
196<U0396> \x5A |1
197<U0397> \x48 |1
198<U0398> \x19 |0
199<U0399> \x49 |1
200<U039A> \x4B |1
201<U039B> \x14 |0
202<U039C> \x4D |1
203<U039D> \x4E |1
204<U039E> \x1A |0
205<U039F> \x4F |1
206<U03A0> \x16 |0
207<U03A1> \x50 |1
208<U03A3> \x18 |0
209<U03A4> \x54 |1
210<U03A5> \x55 |1
211<U03A6> \x12 |0
212<U03A7> \x58 |1
213<U03A8> \x17 |0
214<U03A9> \x15 |0
215<U20AC> \x1B\x65 |0
216END CHARMAP
217