183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh""" Python 'utf-16' Codec 283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehWritten by Marc-Andre Lemburg (mal@lemburg.com). 583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. 783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh""" 983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport codecs, sys 1083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 1183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh### Codec APIs 1283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 1383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehencode = codecs.utf_16_encode 1483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 1583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef decode(input, errors='strict'): 1683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return codecs.utf_16_decode(input, errors, True) 1783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 1883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass IncrementalEncoder(codecs.IncrementalEncoder): 1983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __init__(self, errors='strict'): 2083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh codecs.IncrementalEncoder.__init__(self, errors) 2183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.encoder = None 2283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 2383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def encode(self, input, final=False): 2483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if self.encoder is None: 2583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh result = codecs.utf_16_encode(input, self.errors)[0] 2683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if sys.byteorder == 'little': 2783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.encoder = codecs.utf_16_le_encode 2883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 2983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.encoder = codecs.utf_16_be_encode 3083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return result 3183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self.encoder(input, self.errors)[0] 3283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 3383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def reset(self): 3483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh codecs.IncrementalEncoder.reset(self) 3583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.encoder = None 3683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 3783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def getstate(self): 3883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # state info we return to the caller: 3983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # 0: stream is in natural order for this platform 4083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # 2: endianness hasn't been determined yet 4183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # (we're never writing in unnatural order) 4283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return (2 if self.encoder is None else 0) 4383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 4483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def setstate(self, state): 4583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if state: 4683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.encoder = None 4783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 4883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if sys.byteorder == 'little': 4983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.encoder = codecs.utf_16_le_encode 5083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 5183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.encoder = codecs.utf_16_be_encode 5283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 5383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass IncrementalDecoder(codecs.BufferedIncrementalDecoder): 5483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __init__(self, errors='strict'): 5583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh codecs.BufferedIncrementalDecoder.__init__(self, errors) 5683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.decoder = None 5783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 5883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def _buffer_decode(self, input, errors, final): 5983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if self.decoder is None: 6083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh (output, consumed, byteorder) = \ 6183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh codecs.utf_16_ex_decode(input, errors, 0, final) 6283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if byteorder == -1: 6383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.decoder = codecs.utf_16_le_decode 6483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh elif byteorder == 1: 6583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.decoder = codecs.utf_16_be_decode 6683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh elif consumed >= 2: 6783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise UnicodeError("UTF-16 stream does not start with BOM") 6883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return (output, consumed) 6983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self.decoder(input, self.errors, final) 7083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 7183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def reset(self): 7283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh codecs.BufferedIncrementalDecoder.reset(self) 7383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.decoder = None 7483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 7583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass StreamWriter(codecs.StreamWriter): 7683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __init__(self, stream, errors='strict'): 7783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh codecs.StreamWriter.__init__(self, stream, errors) 7883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.encoder = None 7983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 8083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def reset(self): 8183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh codecs.StreamWriter.reset(self) 8283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.encoder = None 8383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 8483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def encode(self, input, errors='strict'): 8583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if self.encoder is None: 8683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh result = codecs.utf_16_encode(input, errors) 8783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if sys.byteorder == 'little': 8883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.encoder = codecs.utf_16_le_encode 8983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 9083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.encoder = codecs.utf_16_be_encode 9183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return result 9283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 9383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self.encoder(input, errors) 9483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 9583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass StreamReader(codecs.StreamReader): 9683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 9783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def reset(self): 9883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh codecs.StreamReader.reset(self) 9983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh try: 10083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh del self.decode 10183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh except AttributeError: 10283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh pass 10383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 10483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def decode(self, input, errors='strict'): 10583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh (object, consumed, byteorder) = \ 10683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh codecs.utf_16_ex_decode(input, errors, 0, False) 10783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if byteorder == -1: 10883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.decode = codecs.utf_16_le_decode 10983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh elif byteorder == 1: 11083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.decode = codecs.utf_16_be_decode 11183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh elif consumed>=2: 11283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise UnicodeError,"UTF-16 stream does not start with BOM" 11383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return (object, consumed) 11483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 11583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh### encodings module API 11683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 11783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef getregentry(): 11883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return codecs.CodecInfo( 11983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh name='utf-16', 12083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh encode=encode, 12183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh decode=decode, 12283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh incrementalencoder=IncrementalEncoder, 12383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh incrementaldecoder=IncrementalDecoder, 12483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh streamreader=StreamReader, 12583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh streamwriter=StreamWriter, 12683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh ) 127