10c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi""" Python 'utf-16' Codec
20c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
30c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
40c5958b1636c47ed7c284f859c8e805fd06a0e6Bill YiWritten by Marc-Andre Lemburg (mal@lemburg.com).
50c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
60c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
70c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
80c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi"""
90c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiimport codecs, sys
100c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
110c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi### Codec APIs
120c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
130c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiencode = codecs.utf_16_encode
140c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
150c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yidef decode(input, errors='strict'):
160c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    return codecs.utf_16_decode(input, errors, True)
170c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
180c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiclass IncrementalEncoder(codecs.IncrementalEncoder):
190c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def __init__(self, errors='strict'):
200c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        codecs.IncrementalEncoder.__init__(self, errors)
210c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.encoder = None
220c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
230c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def encode(self, input, final=False):
240c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        if self.encoder is None:
250c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            result = codecs.utf_16_encode(input, self.errors)[0]
260c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            if sys.byteorder == 'little':
270c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                self.encoder = codecs.utf_16_le_encode
280c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            else:
290c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                self.encoder = codecs.utf_16_be_encode
300c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            return result
310c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        return self.encoder(input, self.errors)[0]
320c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
330c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def reset(self):
340c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        codecs.IncrementalEncoder.reset(self)
350c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.encoder = None
360c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
370c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def getstate(self):
380c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # state info we return to the caller:
390c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # 0: stream is in natural order for this platform
400c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # 2: endianness hasn't been determined yet
410c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # (we're never writing in unnatural order)
420c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        return (2 if self.encoder is None else 0)
430c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
440c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def setstate(self, state):
450c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        if state:
460c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            self.encoder = None
470c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        else:
480c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            if sys.byteorder == 'little':
490c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                self.encoder = codecs.utf_16_le_encode
500c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            else:
510c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                self.encoder = codecs.utf_16_be_encode
520c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
530c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiclass IncrementalDecoder(codecs.BufferedIncrementalDecoder):
540c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def __init__(self, errors='strict'):
550c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        codecs.BufferedIncrementalDecoder.__init__(self, errors)
560c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.decoder = None
570c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
580c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def _buffer_decode(self, input, errors, final):
590c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        if self.decoder is None:
600c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            (output, consumed, byteorder) = \
610c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                codecs.utf_16_ex_decode(input, errors, 0, final)
620c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            if byteorder == -1:
630c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                self.decoder = codecs.utf_16_le_decode
640c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            elif byteorder == 1:
650c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                self.decoder = codecs.utf_16_be_decode
660c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            elif consumed >= 2:
670c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                raise UnicodeError("UTF-16 stream does not start with BOM")
680c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            return (output, consumed)
690c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        return self.decoder(input, self.errors, final)
700c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
710c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def reset(self):
720c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        codecs.BufferedIncrementalDecoder.reset(self)
730c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.decoder = None
740c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
750c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiclass StreamWriter(codecs.StreamWriter):
760c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def __init__(self, stream, errors='strict'):
770c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        codecs.StreamWriter.__init__(self, stream, errors)
780c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.encoder = None
790c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
800c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def reset(self):
810c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        codecs.StreamWriter.reset(self)
820c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.encoder = None
830c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
840c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def encode(self, input, errors='strict'):
850c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        if self.encoder is None:
860c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            result = codecs.utf_16_encode(input, errors)
870c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            if sys.byteorder == 'little':
880c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                self.encoder = codecs.utf_16_le_encode
890c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            else:
900c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                self.encoder = codecs.utf_16_be_encode
910c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            return result
920c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        else:
930c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            return self.encoder(input, errors)
940c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
950c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiclass StreamReader(codecs.StreamReader):
960c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
970c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def reset(self):
980c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        codecs.StreamReader.reset(self)
990c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        try:
1000c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            del self.decode
1010c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        except AttributeError:
1020c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            pass
1030c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1040c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def decode(self, input, errors='strict'):
1050c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        (object, consumed, byteorder) = \
1060c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            codecs.utf_16_ex_decode(input, errors, 0, False)
1070c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        if byteorder == -1:
1080c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            self.decode = codecs.utf_16_le_decode
1090c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        elif byteorder == 1:
1100c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            self.decode = codecs.utf_16_be_decode
1110c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        elif consumed>=2:
1120c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            raise UnicodeError,"UTF-16 stream does not start with BOM"
1130c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        return (object, consumed)
1140c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1150c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi### encodings module API
1160c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1170c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yidef getregentry():
1180c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    return codecs.CodecInfo(
1190c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        name='utf-16',
1200c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        encode=encode,
1210c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        decode=decode,
1220c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        incrementalencoder=IncrementalEncoder,
1230c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        incrementaldecoder=IncrementalDecoder,
1240c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        streamreader=StreamReader,
1250c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        streamwriter=StreamWriter,
1260c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    )
127