10c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi""" Python 'utf-16' Codec 20c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 30c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 40c5958b1636c47ed7c284f859c8e805fd06a0e6Bill YiWritten by Marc-Andre Lemburg (mal@lemburg.com). 50c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 60c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. 70c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 80c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi""" 90c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiimport codecs, sys 100c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 110c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi### Codec APIs 120c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 130c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiencode = codecs.utf_16_encode 140c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 150c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yidef decode(input, errors='strict'): 160c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi return codecs.utf_16_decode(input, errors, True) 170c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 180c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiclass IncrementalEncoder(codecs.IncrementalEncoder): 190c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def __init__(self, errors='strict'): 200c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi codecs.IncrementalEncoder.__init__(self, errors) 210c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.encoder = None 220c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 230c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def encode(self, input, final=False): 240c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if self.encoder is None: 250c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi result = codecs.utf_16_encode(input, self.errors)[0] 260c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if sys.byteorder == 'little': 270c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.encoder = codecs.utf_16_le_encode 280c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi else: 290c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.encoder = codecs.utf_16_be_encode 300c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi return result 310c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi return self.encoder(input, self.errors)[0] 320c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 330c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def reset(self): 340c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi codecs.IncrementalEncoder.reset(self) 350c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.encoder = None 360c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 370c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def getstate(self): 380c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # state info we return to the caller: 390c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # 0: stream is in natural order for this platform 400c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # 2: endianness hasn't been determined yet 410c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi # (we're never writing in unnatural order) 420c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi return (2 if self.encoder is None else 0) 430c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 440c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def setstate(self, state): 450c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if state: 460c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.encoder = None 470c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi else: 480c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if sys.byteorder == 'little': 490c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.encoder = codecs.utf_16_le_encode 500c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi else: 510c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.encoder = codecs.utf_16_be_encode 520c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 530c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiclass IncrementalDecoder(codecs.BufferedIncrementalDecoder): 540c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def __init__(self, errors='strict'): 550c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi codecs.BufferedIncrementalDecoder.__init__(self, errors) 560c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.decoder = None 570c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 580c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def _buffer_decode(self, input, errors, final): 590c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if self.decoder is None: 600c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi (output, consumed, byteorder) = \ 610c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi codecs.utf_16_ex_decode(input, errors, 0, final) 620c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if byteorder == -1: 630c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.decoder = codecs.utf_16_le_decode 640c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi elif byteorder == 1: 650c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.decoder = codecs.utf_16_be_decode 660c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi elif consumed >= 2: 670c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi raise UnicodeError("UTF-16 stream does not start with BOM") 680c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi return (output, consumed) 690c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi return self.decoder(input, self.errors, final) 700c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 710c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def reset(self): 720c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi codecs.BufferedIncrementalDecoder.reset(self) 730c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.decoder = None 740c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 750c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiclass StreamWriter(codecs.StreamWriter): 760c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def __init__(self, stream, errors='strict'): 770c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi codecs.StreamWriter.__init__(self, stream, errors) 780c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.encoder = None 790c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 800c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def reset(self): 810c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi codecs.StreamWriter.reset(self) 820c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.encoder = None 830c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 840c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def encode(self, input, errors='strict'): 850c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if self.encoder is None: 860c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi result = codecs.utf_16_encode(input, errors) 870c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if sys.byteorder == 'little': 880c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.encoder = codecs.utf_16_le_encode 890c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi else: 900c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.encoder = codecs.utf_16_be_encode 910c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi return result 920c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi else: 930c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi return self.encoder(input, errors) 940c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 950c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiclass StreamReader(codecs.StreamReader): 960c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 970c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def reset(self): 980c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi codecs.StreamReader.reset(self) 990c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi try: 1000c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi del self.decode 1010c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi except AttributeError: 1020c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi pass 1030c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1040c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi def decode(self, input, errors='strict'): 1050c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi (object, consumed, byteorder) = \ 1060c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi codecs.utf_16_ex_decode(input, errors, 0, False) 1070c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi if byteorder == -1: 1080c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.decode = codecs.utf_16_le_decode 1090c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi elif byteorder == 1: 1100c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi self.decode = codecs.utf_16_be_decode 1110c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi elif consumed>=2: 1120c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi raise UnicodeError,"UTF-16 stream does not start with BOM" 1130c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi return (object, consumed) 1140c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1150c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi### encodings module API 1160c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi 1170c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yidef getregentry(): 1180c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi return codecs.CodecInfo( 1190c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi name='utf-16', 1200c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi encode=encode, 1210c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi decode=decode, 1220c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi incrementalencoder=IncrementalEncoder, 1230c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi incrementaldecoder=IncrementalDecoder, 1240c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi streamreader=StreamReader, 1250c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi streamwriter=StreamWriter, 1260c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi ) 127