183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh""" Python 'utf-16' Codec
283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehWritten by Marc-Andre Lemburg (mal@lemburg.com).
583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh"""
983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport codecs, sys
1083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
1183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh### Codec APIs
1283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
1383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehencode = codecs.utf_16_encode
1483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
1583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef decode(input, errors='strict'):
1683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    return codecs.utf_16_decode(input, errors, True)
1783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
1883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass IncrementalEncoder(codecs.IncrementalEncoder):
1983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def __init__(self, errors='strict'):
2083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        codecs.IncrementalEncoder.__init__(self, errors)
2183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.encoder = None
2283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
2383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def encode(self, input, final=False):
2483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if self.encoder is None:
2583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            result = codecs.utf_16_encode(input, self.errors)[0]
2683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            if sys.byteorder == 'little':
2783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                self.encoder = codecs.utf_16_le_encode
2883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            else:
2983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                self.encoder = codecs.utf_16_be_encode
3083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return result
3183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        return self.encoder(input, self.errors)[0]
3283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
3383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def reset(self):
3483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        codecs.IncrementalEncoder.reset(self)
3583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.encoder = None
3683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
3783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def getstate(self):
3883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # state info we return to the caller:
3983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # 0: stream is in natural order for this platform
4083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # 2: endianness hasn't been determined yet
4183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # (we're never writing in unnatural order)
4283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        return (2 if self.encoder is None else 0)
4383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
4483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def setstate(self, state):
4583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if state:
4683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.encoder = None
4783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        else:
4883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            if sys.byteorder == 'little':
4983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                self.encoder = codecs.utf_16_le_encode
5083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            else:
5183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                self.encoder = codecs.utf_16_be_encode
5283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
5383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass IncrementalDecoder(codecs.BufferedIncrementalDecoder):
5483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def __init__(self, errors='strict'):
5583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        codecs.BufferedIncrementalDecoder.__init__(self, errors)
5683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.decoder = None
5783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
5883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def _buffer_decode(self, input, errors, final):
5983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if self.decoder is None:
6083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            (output, consumed, byteorder) = \
6183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                codecs.utf_16_ex_decode(input, errors, 0, final)
6283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            if byteorder == -1:
6383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                self.decoder = codecs.utf_16_le_decode
6483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            elif byteorder == 1:
6583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                self.decoder = codecs.utf_16_be_decode
6683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            elif consumed >= 2:
6783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                raise UnicodeError("UTF-16 stream does not start with BOM")
6883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return (output, consumed)
6983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        return self.decoder(input, self.errors, final)
7083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
7183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def reset(self):
7283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        codecs.BufferedIncrementalDecoder.reset(self)
7383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.decoder = None
7483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
7583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass StreamWriter(codecs.StreamWriter):
7683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def __init__(self, stream, errors='strict'):
7783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        codecs.StreamWriter.__init__(self, stream, errors)
7883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.encoder = None
7983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
8083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def reset(self):
8183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        codecs.StreamWriter.reset(self)
8283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.encoder = None
8383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
8483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def encode(self, input, errors='strict'):
8583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if self.encoder is None:
8683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            result = codecs.utf_16_encode(input, errors)
8783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            if sys.byteorder == 'little':
8883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                self.encoder = codecs.utf_16_le_encode
8983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            else:
9083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                self.encoder = codecs.utf_16_be_encode
9183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return result
9283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        else:
9383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return self.encoder(input, errors)
9483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
9583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass StreamReader(codecs.StreamReader):
9683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
9783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def reset(self):
9883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        codecs.StreamReader.reset(self)
9983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        try:
10083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            del self.decode
10183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        except AttributeError:
10283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            pass
10383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
10483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def decode(self, input, errors='strict'):
10583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        (object, consumed, byteorder) = \
10683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            codecs.utf_16_ex_decode(input, errors, 0, False)
10783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if byteorder == -1:
10883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.decode = codecs.utf_16_le_decode
10983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        elif byteorder == 1:
11083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.decode = codecs.utf_16_be_decode
11183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        elif consumed>=2:
11283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            raise UnicodeError,"UTF-16 stream does not start with BOM"
11383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        return (object, consumed)
11483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
11583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh### encodings module API
11683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
11783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef getregentry():
11883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    return codecs.CodecInfo(
11983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        name='utf-16',
12083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        encode=encode,
12183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        decode=decode,
12283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        incrementalencoder=IncrementalEncoder,
12383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        incrementaldecoder=IncrementalDecoder,
12483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        streamreader=StreamReader,
12583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        streamwriter=StreamWriter,
12683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    )
127