1edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep""" 2edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepPython 'utf-32' Codec 3edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep""" 4edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepimport codecs, sys 5edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 6edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### Codec APIs 7edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 8edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepencode = codecs.utf_32_encode 9edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 10edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef decode(input, errors='strict'): 11edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return codecs.utf_32_decode(input, errors, True) 12edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 13edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass IncrementalEncoder(codecs.IncrementalEncoder): 14edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self, errors='strict'): 15edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.IncrementalEncoder.__init__(self, errors) 16edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = None 17edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 18edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def encode(self, input, final=False): 19edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.encoder is None: 20edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep result = codecs.utf_32_encode(input, self.errors)[0] 21edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if sys.byteorder == 'little': 22edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = codecs.utf_32_le_encode 23edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 24edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = codecs.utf_32_be_encode 25edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return result 26edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self.encoder(input, self.errors)[0] 27edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 28edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def reset(self): 29edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.IncrementalEncoder.reset(self) 30edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = None 31edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 32edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def getstate(self): 33edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # state info we return to the caller: 34edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # 0: stream is in natural order for this platform 35edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # 2: endianness hasn't been determined yet 36edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # (we're never writing in unnatural order) 37edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return (2 if self.encoder is None else 0) 38edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 39edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def setstate(self, state): 40edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if state: 41edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = None 42edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 43edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if sys.byteorder == 'little': 44edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = codecs.utf_32_le_encode 45edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 46edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = codecs.utf_32_be_encode 47edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 48edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass IncrementalDecoder(codecs.BufferedIncrementalDecoder): 49edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self, errors='strict'): 50edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.BufferedIncrementalDecoder.__init__(self, errors) 51edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.decoder = None 52edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 53edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def _buffer_decode(self, input, errors, final): 54edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.decoder is None: 55edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (output, consumed, byteorder) = \ 56edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.utf_32_ex_decode(input, errors, 0, final) 57edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if byteorder == -1: 58edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.decoder = codecs.utf_32_le_decode 59edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif byteorder == 1: 60edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.decoder = codecs.utf_32_be_decode 61edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif consumed >= 4: 62edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise UnicodeError("UTF-32 stream does not start with BOM") 63edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return (output, consumed) 64edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self.decoder(input, self.errors, final) 65edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 66edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def reset(self): 67edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.BufferedIncrementalDecoder.reset(self) 68edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.decoder = None 69edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 70edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def getstate(self): 71edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # additonal state info from the base class must be None here, 72edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # as it isn't passed along to the caller 73edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep state = codecs.BufferedIncrementalDecoder.getstate(self)[0] 74edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # additional state info we pass to the caller: 75edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # 0: stream is in natural order for this platform 76edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # 1: stream is in unnatural order 77edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # 2: endianness hasn't been determined yet 78edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.decoder is None: 79edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return (state, 2) 80edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep addstate = int((sys.byteorder == "big") != 81edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (self.decoder is codecs.utf_32_be_decode)) 82edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return (state, addstate) 83edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 84edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def setstate(self, state): 85edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # state[1] will be ignored by BufferedIncrementalDecoder.setstate() 86edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.BufferedIncrementalDecoder.setstate(self, state) 87edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep state = state[1] 88edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if state == 0: 89edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.decoder = (codecs.utf_32_be_decode 90edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if sys.byteorder == "big" 91edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else codecs.utf_32_le_decode) 92edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif state == 1: 93edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.decoder = (codecs.utf_32_le_decode 94edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if sys.byteorder == "big" 95edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else codecs.utf_32_be_decode) 96edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 97edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.decoder = None 98edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 99edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass StreamWriter(codecs.StreamWriter): 100edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self, stream, errors='strict'): 101edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = None 102edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.StreamWriter.__init__(self, stream, errors) 103edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 104edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def reset(self): 105edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.StreamWriter.reset(self) 106edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = None 107edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 108edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def encode(self, input, errors='strict'): 109edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.encoder is None: 110edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep result = codecs.utf_32_encode(input, errors) 111edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if sys.byteorder == 'little': 112edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = codecs.utf_32_le_encode 113edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 114edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encoder = codecs.utf_32_be_encode 115edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return result 116edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 117edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self.encoder(input, errors) 118edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 119edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass StreamReader(codecs.StreamReader): 120edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 121edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def reset(self): 122edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.StreamReader.reset(self) 123edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep try: 124edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep del self.decode 125edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep except AttributeError: 126edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep pass 127edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 128edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def decode(self, input, errors='strict'): 129edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (object, consumed, byteorder) = \ 130edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.utf_32_ex_decode(input, errors, 0, False) 131edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if byteorder == -1: 132edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.decode = codecs.utf_32_le_decode 133edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif byteorder == 1: 134edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.decode = codecs.utf_32_be_decode 135edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif consumed>=4: 136edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise UnicodeError,"UTF-32 stream does not start with BOM" 137edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return (object, consumed) 138edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 139edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### encodings module API 140edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 141edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef getregentry(): 142edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return codecs.CodecInfo( 143edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep name='utf-32', 144edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep encode=encode, 145edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep decode=decode, 146edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep incrementalencoder=IncrementalEncoder, 147edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep incrementaldecoder=IncrementalDecoder, 148edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep streamreader=StreamReader, 149edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep streamwriter=StreamWriter, 150edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ) 151