1edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep""" Python 'utf-16' Codec
2edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
3edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
4edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepWritten by Marc-Andre Lemburg (mal@lemburg.com).
5edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
6edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
8edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep"""
9edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepimport codecs, sys
10edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
11edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### Codec APIs
12edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
13edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepencode = codecs.utf_16_encode
14edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
15edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef decode(input, errors='strict'):
16edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    return codecs.utf_16_decode(input, errors, True)
17edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
18edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass IncrementalEncoder(codecs.IncrementalEncoder):
19edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __init__(self, errors='strict'):
20edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs.IncrementalEncoder.__init__(self, errors)
21edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.encoder = None
22edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
23edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def encode(self, input, final=False):
24edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.encoder is None:
25edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            result = codecs.utf_16_encode(input, self.errors)[0]
26edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if sys.byteorder == 'little':
27edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.encoder = codecs.utf_16_le_encode
28edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            else:
29edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.encoder = codecs.utf_16_be_encode
30edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            return result
31edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return self.encoder(input, self.errors)[0]
32edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
33edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def reset(self):
34edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs.IncrementalEncoder.reset(self)
35edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.encoder = None
36edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
37edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def getstate(self):
38edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # state info we return to the caller:
39edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # 0: stream is in natural order for this platform
40edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # 2: endianness hasn't been determined yet
41edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # (we're never writing in unnatural order)
42edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return (2 if self.encoder is None else 0)
43edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
44edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def setstate(self, state):
45edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if state:
46edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.encoder = None
47edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        else:
48edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if sys.byteorder == 'little':
49edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.encoder = codecs.utf_16_le_encode
50edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            else:
51edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.encoder = codecs.utf_16_be_encode
52edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
53edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass IncrementalDecoder(codecs.BufferedIncrementalDecoder):
54edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __init__(self, errors='strict'):
55edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs.BufferedIncrementalDecoder.__init__(self, errors)
56edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.decoder = None
57edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
58edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def _buffer_decode(self, input, errors, final):
59edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.decoder is None:
60edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            (output, consumed, byteorder) = \
61edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                codecs.utf_16_ex_decode(input, errors, 0, final)
62edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if byteorder == -1:
63edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.decoder = codecs.utf_16_le_decode
64edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            elif byteorder == 1:
65edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.decoder = codecs.utf_16_be_decode
66edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            elif consumed >= 2:
67edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                raise UnicodeError("UTF-16 stream does not start with BOM")
68edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            return (output, consumed)
69edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return self.decoder(input, self.errors, final)
70edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
71edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def reset(self):
72edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs.BufferedIncrementalDecoder.reset(self)
73edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.decoder = None
74edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
75edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass StreamWriter(codecs.StreamWriter):
76edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __init__(self, stream, errors='strict'):
77edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs.StreamWriter.__init__(self, stream, errors)
78edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.encoder = None
79edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
80edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def reset(self):
81edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs.StreamWriter.reset(self)
82edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.encoder = None
83edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
84edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def encode(self, input, errors='strict'):
85edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.encoder is None:
86edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            result = codecs.utf_16_encode(input, errors)
87edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if sys.byteorder == 'little':
88edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.encoder = codecs.utf_16_le_encode
89edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            else:
90edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.encoder = codecs.utf_16_be_encode
91edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            return result
92edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        else:
93edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            return self.encoder(input, errors)
94edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
95edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass StreamReader(codecs.StreamReader):
96edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
97edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def reset(self):
98edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs.StreamReader.reset(self)
99edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        try:
100edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            del self.decode
101edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        except AttributeError:
102edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            pass
103edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
104edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def decode(self, input, errors='strict'):
105edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        (object, consumed, byteorder) = \
106edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            codecs.utf_16_ex_decode(input, errors, 0, False)
107edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if byteorder == -1:
108edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.decode = codecs.utf_16_le_decode
109edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        elif byteorder == 1:
110edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.decode = codecs.utf_16_be_decode
111edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        elif consumed>=2:
112edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            raise UnicodeError,"UTF-16 stream does not start with BOM"
113edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return (object, consumed)
114edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
115edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### encodings module API
116edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
117edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef getregentry():
118edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    return codecs.CodecInfo(
119edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        name='utf-16',
120edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        encode=encode,
121edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        decode=decode,
122edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        incrementalencoder=IncrementalEncoder,
123edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        incrementaldecoder=IncrementalDecoder,
124edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        streamreader=StreamReader,
125edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        streamwriter=StreamWriter,
126edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    )
127