14adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao""" 24adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoPython 'utf-32' Codec 34adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao""" 44adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport codecs, sys 54adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 64adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao### Codec APIs 74adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 84adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoencode = codecs.utf_32_encode 94adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef decode(input, errors='strict'): 114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return codecs.utf_32_decode(input, errors, True) 124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass IncrementalEncoder(codecs.IncrementalEncoder): 144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __init__(self, errors='strict'): 154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao codecs.IncrementalEncoder.__init__(self, errors) 164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.encoder = None 174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def encode(self, input, final=False): 194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.encoder is None: 204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao result = codecs.utf_32_encode(input, self.errors)[0] 214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if sys.byteorder == 'little': 224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.encoder = codecs.utf_32_le_encode 234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.encoder = codecs.utf_32_be_encode 254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return result 264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.encoder(input, self.errors)[0] 274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def reset(self): 294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao codecs.IncrementalEncoder.reset(self) 304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.encoder = None 314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def getstate(self): 334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # state info we return to the caller: 344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # 0: stream is in natural order for this platform 354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # 2: endianness hasn't been determined yet 364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # (we're never writing in unnatural order) 374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return (2 if self.encoder is None else 0) 384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def setstate(self, state): 404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if state: 414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.encoder = None 424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if sys.byteorder == 'little': 444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.encoder = codecs.utf_32_le_encode 454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.encoder = codecs.utf_32_be_encode 474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass IncrementalDecoder(codecs.BufferedIncrementalDecoder): 494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __init__(self, errors='strict'): 504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao codecs.BufferedIncrementalDecoder.__init__(self, errors) 514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.decoder = None 524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def _buffer_decode(self, input, errors, final): 544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.decoder is None: 554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (output, consumed, byteorder) = \ 564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao codecs.utf_32_ex_decode(input, errors, 0, final) 574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if byteorder == -1: 584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.decoder = codecs.utf_32_le_decode 594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao elif byteorder == 1: 604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.decoder = codecs.utf_32_be_decode 614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao elif consumed >= 4: 624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise UnicodeError("UTF-32 stream does not start with BOM") 634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return (output, consumed) 644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.decoder(input, self.errors, final) 654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def reset(self): 674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao codecs.BufferedIncrementalDecoder.reset(self) 684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.decoder = None 694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def getstate(self): 714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # additonal state info from the base class must be None here, 724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # as it isn't passed along to the caller 734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao state = codecs.BufferedIncrementalDecoder.getstate(self)[0] 744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # additional state info we pass to the caller: 754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # 0: stream is in natural order for this platform 764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # 1: stream is in unnatural order 774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # 2: endianness hasn't been determined yet 784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.decoder is None: 794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return (state, 2) 804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao addstate = int((sys.byteorder == "big") != 814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (self.decoder is codecs.utf_32_be_decode)) 824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return (state, addstate) 834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def setstate(self, state): 854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao # state[1] will be ignored by BufferedIncrementalDecoder.setstate() 864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao codecs.BufferedIncrementalDecoder.setstate(self, state) 874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao state = state[1] 884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if state == 0: 894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.decoder = (codecs.utf_32_be_decode 904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if sys.byteorder == "big" 914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else codecs.utf_32_le_decode) 924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao elif state == 1: 934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.decoder = (codecs.utf_32_le_decode 944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if sys.byteorder == "big" 954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else codecs.utf_32_be_decode) 964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.decoder = None 984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass StreamWriter(codecs.StreamWriter): 1004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def __init__(self, stream, errors='strict'): 1014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.encoder = None 1024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao codecs.StreamWriter.__init__(self, stream, errors) 1034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def reset(self): 1054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao codecs.StreamWriter.reset(self) 1064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.encoder = None 1074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def encode(self, input, errors='strict'): 1094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if self.encoder is None: 1104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao result = codecs.utf_32_encode(input, errors) 1114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if sys.byteorder == 'little': 1124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.encoder = codecs.utf_32_le_encode 1134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 1144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.encoder = codecs.utf_32_be_encode 1154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return result 1164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao else: 1174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return self.encoder(input, errors) 1184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass StreamReader(codecs.StreamReader): 1204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def reset(self): 1224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao codecs.StreamReader.reset(self) 1234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao try: 1244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao del self.decode 1254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao except AttributeError: 1264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao pass 1274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao def decode(self, input, errors='strict'): 1294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao (object, consumed, byteorder) = \ 1304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao codecs.utf_32_ex_decode(input, errors, 0, False) 1314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao if byteorder == -1: 1324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.decode = codecs.utf_32_le_decode 1334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao elif byteorder == 1: 1344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao self.decode = codecs.utf_32_be_decode 1354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao elif consumed>=4: 1364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao raise UnicodeError,"UTF-32 stream does not start with BOM" 1374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return (object, consumed) 1384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao### encodings module API 1404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao 1414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef getregentry(): 1424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao return codecs.CodecInfo( 1434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao name='utf-32', 1444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao encode=encode, 1454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao decode=decode, 1464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao incrementalencoder=IncrementalEncoder, 1474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao incrementaldecoder=IncrementalDecoder, 1484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao streamreader=StreamReader, 1494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao streamwriter=StreamWriter, 1504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao ) 151