1edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep""" Python 'utf-16' Codec 2edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 3edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 4edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepWritten by Marc-Andre Lemburg (mal@lemburg.com). 5edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 6edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. 7edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 8edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep""" 9edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepimport codecs, sys 10edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 11edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### Codec APIs 12edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 13edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepencode = codecs.utf_16_encode 14edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 15edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef decode(input, errors='strict'): 16edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return codecs.utf_16_decode(input, errors, True) 17edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 18edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass IncrementalEncoder(codecs.IncrementalEncoder): 19edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self, errors='strict'): 20edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.IncrementalEncoder.__init__(self, errors) 21edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = None 22edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 23edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def encode(self, input, final=False): 24edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.encoder is None: 25edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep result = codecs.utf_16_encode(input, self.errors)[0] 26edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if sys.byteorder == 'little': 27edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = codecs.utf_16_le_encode 28edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 29edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = codecs.utf_16_be_encode 30edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return result 31edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self.encoder(input, self.errors)[0] 32edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 33edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def reset(self): 34edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.IncrementalEncoder.reset(self) 35edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = None 36edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 37edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def getstate(self): 38edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # state info we return to the caller: 39edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # 0: stream is in natural order for this platform 40edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # 2: endianness hasn't been determined yet 41edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # (we're never writing in unnatural order) 42edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return (2 if self.encoder is None else 0) 43edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 44edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def setstate(self, state): 45edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if state: 46edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = None 47edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 48edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if sys.byteorder == 'little': 49edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = codecs.utf_16_le_encode 50edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 51edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = codecs.utf_16_be_encode 52edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 53edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass IncrementalDecoder(codecs.BufferedIncrementalDecoder): 54edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self, errors='strict'): 55edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.BufferedIncrementalDecoder.__init__(self, errors) 56edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.decoder = None 57edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 58edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def _buffer_decode(self, input, errors, final): 59edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.decoder is None: 60edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (output, consumed, byteorder) = \ 61edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.utf_16_ex_decode(input, errors, 0, final) 62edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if byteorder == -1: 63edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.decoder = codecs.utf_16_le_decode 64edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif byteorder == 1: 65edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.decoder = codecs.utf_16_be_decode 66edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif consumed >= 2: 67edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise UnicodeError("UTF-16 stream does not start with BOM") 68edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return (output, consumed) 69edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self.decoder(input, self.errors, final) 70edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 71edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def reset(self): 72edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.BufferedIncrementalDecoder.reset(self) 73edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.decoder = None 74edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 75edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass StreamWriter(codecs.StreamWriter): 76edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self, stream, errors='strict'): 77edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.StreamWriter.__init__(self, stream, errors) 78edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = None 79edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 80edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def reset(self): 81edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.StreamWriter.reset(self) 82edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = None 83edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 84edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def encode(self, input, errors='strict'): 85edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.encoder is None: 86edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep result = codecs.utf_16_encode(input, errors) 87edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if sys.byteorder == 'little': 88edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = codecs.utf_16_le_encode 89edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 90edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = codecs.utf_16_be_encode 91edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return result 92edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 93edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self.encoder(input, errors) 94edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 95edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass StreamReader(codecs.StreamReader): 96edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 97edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def reset(self): 98edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.StreamReader.reset(self) 99edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep try: 100edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep del self.decode 101edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep except AttributeError: 102edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep pass 103edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 104edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def decode(self, input, errors='strict'): 105edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (object, consumed, byteorder) = \ 106edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.utf_16_ex_decode(input, errors, 0, False) 107edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if byteorder == -1: 108edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.decode = codecs.utf_16_le_decode 109edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif byteorder == 1: 110edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.decode = codecs.utf_16_be_decode 111edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif consumed>=2: 112edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise UnicodeError,"UTF-16 stream does not start with BOM" 113edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return (object, consumed) 114edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 115edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### encodings module API 116edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 117edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef getregentry(): 118edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return codecs.CodecInfo( 119edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep name='utf-16', 120edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep encode=encode, 121edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep decode=decode, 122edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep incrementalencoder=IncrementalEncoder, 123edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep incrementaldecoder=IncrementalDecoder, 124edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep streamreader=StreamReader, 125edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep streamwriter=StreamWriter, 126edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 127