1edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep""" codecs -- Python Codec Registry, API and helpers. 2edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 3edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 4edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepWritten by Marc-Andre Lemburg (mal@lemburg.com). 5edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 6edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. 7edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 8edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep"""#" 9edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 10edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepimport __builtin__, sys 11edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 12edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### Registry and builtin stateless codec functions 13edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 14edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoeptry: 15edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep from _codecs import * 16edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepexcept ImportError, why: 17edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise SystemError('Failed to load the builtin codecs: %s' % why) 18edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 19edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE", 20edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE", 21edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE", 22edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE", 23edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "strict_errors", "ignore_errors", "replace_errors", 24edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "xmlcharrefreplace_errors", 25edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "register_error", "lookup_error"] 26edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 27edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### Constants 28edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 29edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# 30edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF) 31edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# and its possible byte string values 32edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# for UTF8/UTF16/UTF32 output and little/big endian machines 33edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# 34edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 35edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# UTF-8 36edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepBOM_UTF8 = '\xef\xbb\xbf' 37edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 38edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# UTF-16, little endian 39edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepBOM_LE = BOM_UTF16_LE = '\xff\xfe' 40edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 41edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# UTF-16, big endian 42edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepBOM_BE = BOM_UTF16_BE = '\xfe\xff' 43edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 44edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# UTF-32, little endian 45edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepBOM_UTF32_LE = '\xff\xfe\x00\x00' 46edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 47edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# UTF-32, big endian 48edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepBOM_UTF32_BE = '\x00\x00\xfe\xff' 49edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 50edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepif sys.byteorder == 'little': 51edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 52edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # UTF-16, native endianness 53edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep BOM = BOM_UTF16 = BOM_UTF16_LE 54edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 55edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # UTF-32, native endianness 56edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep BOM_UTF32 = BOM_UTF32_LE 57edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 58edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepelse: 59edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 60edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # UTF-16, native endianness 61edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep BOM = BOM_UTF16 = BOM_UTF16_BE 62edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 63edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # UTF-32, native endianness 64edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep BOM_UTF32 = BOM_UTF32_BE 65edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 66edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# Old broken names (don't use in new code) 67edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepBOM32_LE = BOM_UTF16_LE 68edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepBOM32_BE = BOM_UTF16_BE 69edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepBOM64_LE = BOM_UTF32_LE 70edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepBOM64_BE = BOM_UTF32_BE 71edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 72edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 73edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### Codec base classes (defining the API) 74edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 75edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass CodecInfo(tuple): 76edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 77edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __new__(cls, encode, decode, streamreader=None, streamwriter=None, 78edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep incrementalencoder=None, incrementaldecoder=None, name=None): 79edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter)) 80edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.name = name 81edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encode = encode 82edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.decode = decode 83edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.incrementalencoder = incrementalencoder 84edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.incrementaldecoder = incrementaldecoder 85edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.streamwriter = streamwriter 86edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.streamreader = streamreader 87edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self 88edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 89edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __repr__(self): 90edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return "<%s.%s object for encoding %s at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self)) 91edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 92edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass Codec: 93edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 94edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Defines the interface for stateless encoders/decoders. 95edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 96edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep The .encode()/.decode() methods may use different error 97edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep handling schemes by providing the errors argument. These 98edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep string values are predefined: 99edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 100edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 'strict' - raise a ValueError error (or a subclass) 101edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 'ignore' - ignore the character and continue with the next 102edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 'replace' - replace with a suitable replacement character; 103edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Python will use the official U+FFFD REPLACEMENT 104edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep CHARACTER for the builtin Unicode codecs on 105edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep decoding and '?' on encoding. 106edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 'xmlcharrefreplace' - Replace with the appropriate XML 107edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep character reference (only for encoding). 108edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 'backslashreplace' - Replace with backslashed escape sequences 109edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (only for encoding). 110edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 111edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep The set of allowed values can be extended via register_error. 112edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 113edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 114edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def encode(self, input, errors='strict'): 115edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 116edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Encodes the object input and returns a tuple (output 117edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep object, length consumed). 118edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 119edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep errors defines the error handling to apply. It defaults to 120edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 'strict' handling. 121edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 122edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep The method may not store state in the Codec instance. Use 123edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep StreamCodec for codecs which have to keep state in order to 124edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep make encoding/decoding efficient. 125edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 126edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep The encoder must be able to handle zero length input and 127edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return an empty object of the output object type in this 128edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep situation. 129edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 130edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 131edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise NotImplementedError 132edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 133edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def decode(self, input, errors='strict'): 134edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 135edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Decodes the object input and returns a tuple (output 136edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep object, length consumed). 137edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 138edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep input must be an object which provides the bf_getreadbuf 139edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep buffer slot. Python strings, buffer objects and memory 140edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep mapped files are examples of objects providing this slot. 141edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 142edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep errors defines the error handling to apply. It defaults to 143edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 'strict' handling. 144edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 145edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep The method may not store state in the Codec instance. Use 146edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep StreamCodec for codecs which have to keep state in order to 147edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep make encoding/decoding efficient. 148edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 149edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep The decoder must be able to handle zero length input and 150edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return an empty object of the output object type in this 151edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep situation. 152edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 153edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 154edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise NotImplementedError 155edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 156edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass IncrementalEncoder(object): 157edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 158edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep An IncrementalEncoder encodes an input in multiple steps. The input can be 159edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep passed piece by piece to the encode() method. The IncrementalEncoder remembers 160edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep the state of the Encoding process between calls to encode(). 161edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 162edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self, errors='strict'): 163edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 164edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Creates an IncrementalEncoder instance. 165edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 166edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep The IncrementalEncoder may use different error handling schemes by 167edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep providing the errors keyword argument. See the module docstring 168edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for a list of possible values. 169edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 170edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.errors = errors 171edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.buffer = "" 172edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 173edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def encode(self, input, final=False): 174edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 175edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Encodes input and returns the resulting object. 176edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 177edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise NotImplementedError 178edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 179edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def reset(self): 180edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 181edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Resets the encoder to the initial state. 182edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 183edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 184edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def getstate(self): 185edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 186edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Return the current state of the encoder. 187edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 188edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return 0 189edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 190edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def setstate(self, state): 191edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 192edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Set the current state of the encoder. state must have been 193edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep returned by getstate(). 194edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 195edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 196edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass BufferedIncrementalEncoder(IncrementalEncoder): 197edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 198edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep This subclass of IncrementalEncoder can be used as the baseclass for an 199edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep incremental encoder if the encoder must keep some of the output in a 200edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep buffer between calls to encode(). 201edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 202edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self, errors='strict'): 203edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep IncrementalEncoder.__init__(self, errors) 204edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.buffer = "" # unencoded input that is kept between calls to encode() 205edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 206edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def _buffer_encode(self, input, errors, final): 207edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Overwrite this method in subclasses: It must encode input 208edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # and return an (output, length consumed) tuple 209edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise NotImplementedError 210edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 211edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def encode(self, input, final=False): 212edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # encode input (taking the buffer into account) 213edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data = self.buffer + input 214edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (result, consumed) = self._buffer_encode(data, self.errors, final) 215edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # keep unencoded input until the next call 216edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.buffer = data[consumed:] 217edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return result 218edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 219edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def reset(self): 220edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep IncrementalEncoder.reset(self) 221edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.buffer = "" 222edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 223edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def getstate(self): 224edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self.buffer or 0 225edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 226edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def setstate(self, state): 227edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.buffer = state or "" 228edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 229edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass IncrementalDecoder(object): 230edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 231edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep An IncrementalDecoder decodes an input in multiple steps. The input can be 232edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep passed piece by piece to the decode() method. The IncrementalDecoder 233edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep remembers the state of the decoding process between calls to decode(). 234edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 235edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self, errors='strict'): 236edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 237edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Creates a IncrementalDecoder instance. 238edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 239edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep The IncrementalDecoder may use different error handling schemes by 240edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep providing the errors keyword argument. See the module docstring 241edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for a list of possible values. 242edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 243edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.errors = errors 244edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 245edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def decode(self, input, final=False): 246edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 247edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Decodes input and returns the resulting object. 248edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 249edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise NotImplementedError 250edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 251edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def reset(self): 252edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 253edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Resets the decoder to the initial state. 254edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 255edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 256edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def getstate(self): 257edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 258edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Return the current state of the decoder. 259edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 260edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep This must be a (buffered_input, additional_state_info) tuple. 261edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep buffered_input must be a bytes object containing bytes that 262edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep were passed to decode() that have not yet been converted. 263edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep additional_state_info must be a non-negative integer 264edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep representing the state of the decoder WITHOUT yet having 265edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep processed the contents of buffered_input. In the initial state 266edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep and after reset(), getstate() must return (b"", 0). 267edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 268edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return (b"", 0) 269edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 270edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def setstate(self, state): 271edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 272edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Set the current state of the decoder. 273edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 274edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep state must have been returned by getstate(). The effect of 275edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep setstate((b"", 0)) must be equivalent to reset(). 276edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 277edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 278edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass BufferedIncrementalDecoder(IncrementalDecoder): 279edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 280edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep This subclass of IncrementalDecoder can be used as the baseclass for an 281edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep incremental decoder if the decoder must be able to handle incomplete byte 282edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sequences. 283edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 284edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self, errors='strict'): 285edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep IncrementalDecoder.__init__(self, errors) 286edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.buffer = "" # undecoded input that is kept between calls to decode() 287edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 288edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def _buffer_decode(self, input, errors, final): 289edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Overwrite this method in subclasses: It must decode input 290edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # and return an (output, length consumed) tuple 291edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise NotImplementedError 292edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 293edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def decode(self, input, final=False): 294edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # decode input (taking the buffer into account) 295edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data = self.buffer + input 296edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (result, consumed) = self._buffer_decode(data, self.errors, final) 297edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # keep undecoded input until the next call 298edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.buffer = data[consumed:] 299edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return result 300edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 301edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def reset(self): 302edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep IncrementalDecoder.reset(self) 303edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.buffer = "" 304edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 305edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def getstate(self): 306edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # additional state info is always 0 307edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return (self.buffer, 0) 308edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 309edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def setstate(self, state): 310edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # ignore additional state info 311edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.buffer = state[0] 312edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 313edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# 314edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# The StreamWriter and StreamReader class provide generic working 315edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# interfaces which can be used to implement new encoding submodules 316edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# very easily. See encodings/utf_8.py for an example on how this is 317edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# done. 318edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# 319edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 320edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass StreamWriter(Codec): 321edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 322edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self, stream, errors='strict'): 323edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 324edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Creates a StreamWriter instance. 325edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 326edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep stream must be a file-like object open for writing 327edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (binary) data. 328edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 329edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep The StreamWriter may use different error handling 330edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep schemes by providing the errors keyword argument. These 331edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep parameters are predefined: 332edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 333edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 'strict' - raise a ValueError (or a subclass) 334edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 'ignore' - ignore the character and continue with the next 335edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 'replace'- replace with a suitable replacement character 336edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 'xmlcharrefreplace' - Replace with the appropriate XML 337edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep character reference. 338edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 'backslashreplace' - Replace with backslashed escape 339edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sequences (only for encoding). 340edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 341edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep The set of allowed parameter values can be extended via 342edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep register_error. 343edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 344edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.stream = stream 345edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.errors = errors 346edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 347edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def write(self, object): 348edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 349edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Writes the object's contents encoded to self.stream. 350edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 351edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data, consumed = self.encode(object, self.errors) 352edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.stream.write(data) 353edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 354edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def writelines(self, list): 355edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 356edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Writes the concatenated list of strings to the stream 357edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep using .write(). 358edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 359edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.write(''.join(list)) 360edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 361edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def reset(self): 362edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 363edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Flushes and resets the codec buffers used for keeping state. 364edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 365edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Calling this method should ensure that the data on the 366edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep output is put into a clean state, that allows appending 367edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep of new fresh data without having to rescan the whole 368edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep stream to recover state. 369edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 370edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 371edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep pass 372edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 373edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def seek(self, offset, whence=0): 374edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.stream.seek(offset, whence) 375edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if whence == 0 and offset == 0: 376edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.reset() 377edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 378edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __getattr__(self, name, 379edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep getattr=getattr): 380edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 381edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Inherit all other methods from the underlying stream. 382edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 383edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return getattr(self.stream, name) 384edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 385edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __enter__(self): 386edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self 387edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 388edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __exit__(self, type, value, tb): 389edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.stream.close() 390edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 391edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### 392edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 393edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass StreamReader(Codec): 394edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 395edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self, stream, errors='strict'): 396edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 397edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Creates a StreamReader instance. 398edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 399edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep stream must be a file-like object open for reading 400edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (binary) data. 401edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 402edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep The StreamReader may use different error handling 403edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep schemes by providing the errors keyword argument. These 404edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep parameters are predefined: 405edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 406edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 'strict' - raise a ValueError (or a subclass) 407edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 'ignore' - ignore the character and continue with the next 408edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 'replace'- replace with a suitable replacement character; 409edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 410edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep The set of allowed parameter values can be extended via 411edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep register_error. 412edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 413edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.stream = stream 414edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.errors = errors 415edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.bytebuffer = "" 416edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # For str->str decoding this will stay a str 417edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # For str->unicode decoding the first read will promote it to unicode 418edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.charbuffer = "" 419edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.linebuffer = None 420edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 421edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def decode(self, input, errors='strict'): 422edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise NotImplementedError 423edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 424edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def read(self, size=-1, chars=-1, firstline=False): 425edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 426edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Decodes data from the stream self.stream and returns the 427edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep resulting object. 428edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 429edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep chars indicates the number of characters to read from the 430edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep stream. read() will never return more than chars 431edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep characters, but it might return less, if there are not enough 432edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep characters available. 433edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 434edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep size indicates the approximate maximum number of bytes to 435edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep read from the stream for decoding purposes. The decoder 436edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep can modify this setting as appropriate. The default value 437edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep -1 indicates to read and decode as much as possible. size 438edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep is intended to prevent having to decode huge files in one 439edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep step. 440edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 441edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep If firstline is true, and a UnicodeDecodeError happens 442edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep after the first line terminator in the input only the first line 443edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep will be returned, the rest of the input will be kept until the 444edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep next call to read(). 445edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 446edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep The method should use a greedy read strategy meaning that 447edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep it should read as much data as is allowed within the 448edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep definition of the encoding and the given size, e.g. if 449edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep optional encoding endings or state markers are available 450edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep on the stream, these should be read too. 451edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 452edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # If we have lines cached, first merge them back into characters 453edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.linebuffer: 454edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.charbuffer = "".join(self.linebuffer) 455edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.linebuffer = None 456edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 457edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # read until we get the required number of characters (if available) 458edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep while True: 459edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # can the request can be satisfied from the character buffer? 460edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if chars < 0: 461edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if size < 0: 462edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.charbuffer: 463edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep break 464edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif len(self.charbuffer) >= size: 465edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep break 466edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 467edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if len(self.charbuffer) >= chars: 468edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep break 469edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # we need more data 470edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if size < 0: 471edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep newdata = self.stream.read() 472edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 473edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep newdata = self.stream.read(size) 474edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # decode bytes (those remaining from the last call included) 475edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data = self.bytebuffer + newdata 476edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep try: 477edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep newchars, decodedbytes = self.decode(data, self.errors) 478edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep except UnicodeDecodeError, exc: 479edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if firstline: 480edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep newchars, decodedbytes = self.decode(data[:exc.start], self.errors) 481edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep lines = newchars.splitlines(True) 482edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if len(lines)<=1: 483edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise 484edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 485edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise 486edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # keep undecoded bytes until the next call 487edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.bytebuffer = data[decodedbytes:] 488edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # put new characters in the character buffer 489edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.charbuffer += newchars 490edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # there was no data available 491edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if not newdata: 492edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep break 493edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if chars < 0: 494edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Return everything we've got 495edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep result = self.charbuffer 496edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.charbuffer = "" 497edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 498edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Return the first chars characters 499edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep result = self.charbuffer[:chars] 500edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.charbuffer = self.charbuffer[chars:] 501edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return result 502edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 503edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def readline(self, size=None, keepends=True): 504edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 505edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Read one line from the input stream and return the 506edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep decoded data. 507edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 508edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep size, if given, is passed as size argument to the 509edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep read() method. 510edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 511edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 512edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # If we have lines cached from an earlier read, return 513edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # them unconditionally 514edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.linebuffer: 515edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep line = self.linebuffer[0] 516edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep del self.linebuffer[0] 517edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if len(self.linebuffer) == 1: 518edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # revert to charbuffer mode; we might need more data 519edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # next time 520edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.charbuffer = self.linebuffer[0] 521edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.linebuffer = None 522edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if not keepends: 523edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep line = line.splitlines(False)[0] 524edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return line 525edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 526edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep readsize = size or 72 527edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep line = "" 528edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # If size is given, we call read() only once 529edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep while True: 530edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data = self.read(readsize, firstline=True) 531edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if data: 532edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # If we're at a "\r" read one extra character (which might 533edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # be a "\n") to get a proper line ending. If the stream is 534edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # temporarily exhausted we return the wrong line ending. 535edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if data.endswith("\r"): 536edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data += self.read(size=1, chars=1) 537edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 538edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep line += data 539edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep lines = line.splitlines(True) 540edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if lines: 541edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if len(lines) > 1: 542edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # More than one line result; the first line is a full line 543edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # to return 544edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep line = lines[0] 545edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep del lines[0] 546edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if len(lines) > 1: 547edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # cache the remaining lines 548edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep lines[-1] += self.charbuffer 549edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.linebuffer = lines 550edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.charbuffer = None 551edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 552edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # only one remaining line, put it back into charbuffer 553edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.charbuffer = lines[0] + self.charbuffer 554edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if not keepends: 555edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep line = line.splitlines(False)[0] 556edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep break 557edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep line0withend = lines[0] 558edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep line0withoutend = lines[0].splitlines(False)[0] 559edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if line0withend != line0withoutend: # We really have a line end 560edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Put the rest back together and keep it until the next call 561edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.charbuffer = "".join(lines[1:]) + self.charbuffer 562edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if keepends: 563edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep line = line0withend 564edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 565edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep line = line0withoutend 566edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep break 567edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # we didn't get anything or this was our only try 568edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if not data or size is not None: 569edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if line and not keepends: 570edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep line = line.splitlines(False)[0] 571edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep break 572edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if readsize<8000: 573edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep readsize *= 2 574edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return line 575edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 576edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def readlines(self, sizehint=None, keepends=True): 577edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 578edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Read all lines available on the input stream 579edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep and return them as list of lines. 580edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 581edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Line breaks are implemented using the codec's decoder 582edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep method and are included in the list entries. 583edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 584edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sizehint, if given, is ignored since there is no efficient 585edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep way to finding the true end-of-line. 586edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 587edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 588edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data = self.read() 589edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return data.splitlines(keepends) 590edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 591edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def reset(self): 592edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 593edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Resets the codec buffers used for keeping state. 594edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 595edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Note that no stream repositioning should take place. 596edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep This method is primarily intended to be able to recover 597edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep from decoding errors. 598edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 599edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 600edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.bytebuffer = "" 601edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.charbuffer = u"" 602edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.linebuffer = None 603edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 604edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def seek(self, offset, whence=0): 605edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Set the input stream's current position. 606edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 607edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Resets the codec buffers used for keeping state. 608edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 609edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.stream.seek(offset, whence) 610edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.reset() 611edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 612edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def next(self): 613edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 614edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Return the next decoded line from the input stream.""" 615edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep line = self.readline() 616edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if line: 617edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return line 618edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise StopIteration 619edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 620edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __iter__(self): 621edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self 622edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 623edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __getattr__(self, name, 624edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep getattr=getattr): 625edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 626edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Inherit all other methods from the underlying stream. 627edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 628edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return getattr(self.stream, name) 629edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 630edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __enter__(self): 631edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self 632edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 633edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __exit__(self, type, value, tb): 634edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.stream.close() 635edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 636edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### 637edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 638edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass StreamReaderWriter: 639edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 640edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ StreamReaderWriter instances allow wrapping streams which 641edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep work in both read and write modes. 642edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 643edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep The design is such that one can use the factory functions 644edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep returned by the codec.lookup() function to construct the 645edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep instance. 646edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 647edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 648edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Optional attributes set by the file wrappers below 649edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep encoding = 'unknown' 650edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 651edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self, stream, Reader, Writer, errors='strict'): 652edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 653edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Creates a StreamReaderWriter instance. 654edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 655edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep stream must be a Stream-like object. 656edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 657edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Reader, Writer must be factory functions or classes 658edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep providing the StreamReader, StreamWriter interface resp. 659edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 660edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Error handling is done in the same way as defined for the 661edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep StreamWriter/Readers. 662edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 663edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 664edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.stream = stream 665edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.reader = Reader(stream, errors) 666edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.writer = Writer(stream, errors) 667edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.errors = errors 668edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 669edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def read(self, size=-1): 670edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 671edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self.reader.read(size) 672edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 673edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def readline(self, size=None): 674edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 675edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self.reader.readline(size) 676edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 677edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def readlines(self, sizehint=None): 678edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 679edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self.reader.readlines(sizehint) 680edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 681edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def next(self): 682edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 683edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Return the next decoded line from the input stream.""" 684edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self.reader.next() 685edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 686edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __iter__(self): 687edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self 688edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 689edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def write(self, data): 690edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 691edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self.writer.write(data) 692edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 693edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def writelines(self, list): 694edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 695edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self.writer.writelines(list) 696edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 697edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def reset(self): 698edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 699edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.reader.reset() 700edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.writer.reset() 701edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 702edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def seek(self, offset, whence=0): 703edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.stream.seek(offset, whence) 704edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.reader.reset() 705edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if whence == 0 and offset == 0: 706edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.writer.reset() 707edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 708edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __getattr__(self, name, 709edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep getattr=getattr): 710edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 711edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Inherit all other methods from the underlying stream. 712edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 713edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return getattr(self.stream, name) 714edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 715edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # these are needed to make "with codecs.open(...)" work properly 716edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 717edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __enter__(self): 718edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self 719edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 720edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __exit__(self, type, value, tb): 721edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.stream.close() 722edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 723edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### 724edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 725edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass StreamRecoder: 726edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 727edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ StreamRecoder instances provide a frontend - backend 728edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep view of encoding data. 729edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 730edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep They use the complete set of APIs returned by the 731edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs.lookup() function to implement their task. 732edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 733edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Data written to the stream is first decoded into an 734edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep intermediate format (which is dependent on the given codec 735edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep combination) and then written to the stream using an instance 736edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep of the provided Writer class. 737edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 738edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep In the other direction, data is read from the stream using a 739edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Reader instance and then return encoded data to the caller. 740edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 741edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 742edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Optional attributes set by the file wrappers below 743edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data_encoding = 'unknown' 744edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep file_encoding = 'unknown' 745edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 746edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self, stream, encode, decode, Reader, Writer, 747edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep errors='strict'): 748edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 749edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Creates a StreamRecoder instance which implements a two-way 750edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep conversion: encode and decode work on the frontend (the 751edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep input to .read() and output of .write()) while 752edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Reader and Writer work on the backend (reading and 753edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep writing to the stream). 754edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 755edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep You can use these objects to do transparent direct 756edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep recodings from e.g. latin-1 to utf-8 and back. 757edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 758edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep stream must be a file-like object. 759edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 760edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep encode, decode must adhere to the Codec interface, Reader, 761edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Writer must be factory functions or classes providing the 762edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep StreamReader, StreamWriter interface resp. 763edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 764edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep encode and decode are needed for the frontend translation, 765edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Reader and Writer for the backend translation. Unicode is 766edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep used as intermediate encoding. 767edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 768edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Error handling is done in the same way as defined for the 769edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep StreamWriter/Readers. 770edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 771edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 772edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.stream = stream 773edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.encode = encode 774edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.decode = decode 775edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.reader = Reader(stream, errors) 776edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.writer = Writer(stream, errors) 777edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.errors = errors 778edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 779edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def read(self, size=-1): 780edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 781edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data = self.reader.read(size) 782edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data, bytesencoded = self.encode(data, self.errors) 783edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return data 784edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 785edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def readline(self, size=None): 786edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 787edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if size is None: 788edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data = self.reader.readline() 789edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 790edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data = self.reader.readline(size) 791edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data, bytesencoded = self.encode(data, self.errors) 792edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return data 793edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 794edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def readlines(self, sizehint=None): 795edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 796edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data = self.reader.read() 797edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data, bytesencoded = self.encode(data, self.errors) 798edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return data.splitlines(1) 799edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 800edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def next(self): 801edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 802edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Return the next decoded line from the input stream.""" 803edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data = self.reader.next() 804edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data, bytesencoded = self.encode(data, self.errors) 805edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return data 806edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 807edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __iter__(self): 808edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self 809edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 810edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def write(self, data): 811edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 812edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data, bytesdecoded = self.decode(data, self.errors) 813edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self.writer.write(data) 814edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 815edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def writelines(self, list): 816edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 817edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data = ''.join(list) 818edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data, bytesdecoded = self.decode(data, self.errors) 819edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self.writer.write(data) 820edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 821edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def reset(self): 822edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 823edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.reader.reset() 824edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.writer.reset() 825edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 826edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __getattr__(self, name, 827edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep getattr=getattr): 828edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 829edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Inherit all other methods from the underlying stream. 830edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 831edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return getattr(self.stream, name) 832edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 833edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __enter__(self): 834edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self 835edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 836edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __exit__(self, type, value, tb): 837edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.stream.close() 838edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 839edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### Shortcuts 840edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 841edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef open(filename, mode='rb', encoding=None, errors='strict', buffering=1): 842edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 843edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Open an encoded file using the given mode and return 844edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep a wrapped version providing transparent encoding/decoding. 845edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 846edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Note: The wrapped version will only accept the object format 847edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep defined by the codecs, i.e. Unicode objects for most builtin 848edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep codecs. Output is also codec dependent and will usually be 849edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Unicode as well. 850edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 851edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Files are always opened in binary mode, even if no binary mode 852edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep was specified. This is done to avoid data loss due to encodings 853edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep using 8-bit values. The default file mode is 'rb' meaning to 854edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep open the file in binary read mode. 855edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 856edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep encoding specifies the encoding which is to be used for the 857edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep file. 858edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 859edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep errors may be given to define the error handling. It defaults 860edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep to 'strict' which causes ValueErrors to be raised in case an 861edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep encoding error occurs. 862edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 863edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep buffering has the same meaning as for the builtin open() API. 864edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep It defaults to line buffered. 865edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 866edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep The returned wrapped file object provides an extra attribute 867edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep .encoding which allows querying the used encoding. This 868edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep attribute is only available if an encoding was specified as 869edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep parameter. 870edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 871edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 872edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if encoding is not None: 873edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if 'U' in mode: 874edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # No automatic conversion of '\n' is done on reading and writing 875edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep mode = mode.strip().replace('U', '') 876edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if mode[:1] not in set('rwa'): 877edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep mode = 'r' + mode 878edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if 'b' not in mode: 879edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Force opening of the file in binary mode 880edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep mode = mode + 'b' 881edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep file = __builtin__.open(filename, mode, buffering) 882edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if encoding is None: 883edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return file 884edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep info = lookup(encoding) 885edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep srw = StreamReaderWriter(file, info.streamreader, info.streamwriter, errors) 886edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Add attributes to simplify introspection 887edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep srw.encoding = encoding 888edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return srw 889edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 890edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef EncodedFile(file, data_encoding, file_encoding=None, errors='strict'): 891edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 892edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Return a wrapped version of file which provides transparent 893edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep encoding translation. 894edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 895edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Strings written to the wrapped file are interpreted according 896edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep to the given data_encoding and then written to the original 897edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep file as string using file_encoding. The intermediate encoding 898edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep will usually be Unicode but depends on the specified codecs. 899edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 900edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Strings are read from the file using file_encoding and then 901edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep passed back to the caller as string using data_encoding. 902edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 903edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep If file_encoding is not given, it defaults to data_encoding. 904edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 905edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep errors may be given to define the error handling. It defaults 906edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep to 'strict' which causes ValueErrors to be raised in case an 907edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep encoding error occurs. 908edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 909edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep The returned wrapped file object provides two extra attributes 910edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep .data_encoding and .file_encoding which reflect the given 911edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep parameters of the same name. The attributes can be used for 912edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep introspection by Python programs. 913edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 914edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 915edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if file_encoding is None: 916edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep file_encoding = data_encoding 917edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep data_info = lookup(data_encoding) 918edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep file_info = lookup(file_encoding) 919edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sr = StreamRecoder(file, data_info.encode, data_info.decode, 920edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep file_info.streamreader, file_info.streamwriter, errors) 921edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Add attributes to simplify introspection 922edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sr.data_encoding = data_encoding 923edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sr.file_encoding = file_encoding 924edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return sr 925edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 926edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### Helpers for codec lookup 927edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 928edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef getencoder(encoding): 929edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 930edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Lookup up the codec for the given encoding and return 931edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep its encoder function. 932edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 933edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Raises a LookupError in case the encoding cannot be found. 934edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 935edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 936edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return lookup(encoding).encode 937edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 938edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef getdecoder(encoding): 939edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 940edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Lookup up the codec for the given encoding and return 941edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep its decoder function. 942edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 943edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Raises a LookupError in case the encoding cannot be found. 944edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 945edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 946edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return lookup(encoding).decode 947edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 948edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef getincrementalencoder(encoding): 949edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 950edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Lookup up the codec for the given encoding and return 951edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep its IncrementalEncoder class or factory function. 952edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 953edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Raises a LookupError in case the encoding cannot be found 954edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep or the codecs doesn't provide an incremental encoder. 955edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 956edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 957edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep encoder = lookup(encoding).incrementalencoder 958edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if encoder is None: 959edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise LookupError(encoding) 960edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return encoder 961edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 962edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef getincrementaldecoder(encoding): 963edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 964edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Lookup up the codec for the given encoding and return 965edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep its IncrementalDecoder class or factory function. 966edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 967edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Raises a LookupError in case the encoding cannot be found 968edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep or the codecs doesn't provide an incremental decoder. 969edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 970edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 971edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep decoder = lookup(encoding).incrementaldecoder 972edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if decoder is None: 973edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise LookupError(encoding) 974edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return decoder 975edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 976edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef getreader(encoding): 977edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 978edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Lookup up the codec for the given encoding and return 979edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep its StreamReader class or factory function. 980edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 981edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Raises a LookupError in case the encoding cannot be found. 982edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 983edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 984edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return lookup(encoding).streamreader 985edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 986edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef getwriter(encoding): 987edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 988edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Lookup up the codec for the given encoding and return 989edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep its StreamWriter class or factory function. 990edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 991edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Raises a LookupError in case the encoding cannot be found. 992edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 993edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 994edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return lookup(encoding).streamwriter 995edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 996edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef iterencode(iterator, encoding, errors='strict', **kwargs): 997edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 998edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Encoding iterator. 999edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1000edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Encodes the input strings from the iterator using a IncrementalEncoder. 1001edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1002edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep errors and kwargs are passed through to the IncrementalEncoder 1003edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep constructor. 1004edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 1005edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep encoder = getincrementalencoder(encoding)(errors, **kwargs) 1006edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for input in iterator: 1007edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep output = encoder.encode(input) 1008edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if output: 1009edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep yield output 1010edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep output = encoder.encode("", True) 1011edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if output: 1012edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep yield output 1013edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1014edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef iterdecode(iterator, encoding, errors='strict', **kwargs): 1015edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 1016edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Decoding iterator. 1017edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1018edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Decodes the input strings from the iterator using a IncrementalDecoder. 1019edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1020edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep errors and kwargs are passed through to the IncrementalDecoder 1021edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep constructor. 1022edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 1023edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep decoder = getincrementaldecoder(encoding)(errors, **kwargs) 1024edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for input in iterator: 1025edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep output = decoder.decode(input) 1026edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if output: 1027edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep yield output 1028edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep output = decoder.decode("", True) 1029edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if output: 1030edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep yield output 1031edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1032edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### Helpers for charmap-based codecs 1033edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1034edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef make_identity_dict(rng): 1035edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1036edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ make_identity_dict(rng) -> dict 1037edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1038edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep Return a dictionary where elements of the rng sequence are 1039edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep mapped to themselves. 1040edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1041edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 1042edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep res = {} 1043edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for i in rng: 1044edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep res[i]=i 1045edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return res 1046edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1047edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef make_encoding_map(decoding_map): 1048edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1049edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ Creates an encoding map from a decoding map. 1050edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1051edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep If a target mapping in the decoding map occurs multiple 1052edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep times, then that target is mapped to None (undefined mapping), 1053edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep causing an exception when encountered by the charmap codec 1054edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep during translation. 1055edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1056edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep One example where this happens is cp875.py which decodes 1057edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep multiple character to \u001a. 1058edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1059edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep """ 1060edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep m = {} 1061edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep for k,v in decoding_map.items(): 1062edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if not v in m: 1063edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep m[v] = k 1064edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 1065edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep m[v] = None 1066edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return m 1067edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1068edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### error handlers 1069edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1070edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoeptry: 1071edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep strict_errors = lookup_error("strict") 1072edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ignore_errors = lookup_error("ignore") 1073edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep replace_errors = lookup_error("replace") 1074edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace") 1075edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep backslashreplace_errors = lookup_error("backslashreplace") 1076edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepexcept LookupError: 1077edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # In --disable-unicode builds, these error handler are missing 1078edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep strict_errors = None 1079edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep ignore_errors = None 1080edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep replace_errors = None 1081edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep xmlcharrefreplace_errors = None 1082edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep backslashreplace_errors = None 1083edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1084edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# Tell modulefinder that using codecs probably needs the encodings 1085edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# package 1086edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep_false = 0 1087edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepif _false: 1088edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep import encodings 1089edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1090edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### Tests 1091edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1092edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepif __name__ == '__main__': 1093edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1094edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Make stdout translate Latin-1 output into UTF-8 output 1095edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'utf-8') 1096edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 1097edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Have stdin translate Latin-1 input into UTF-8 input 1098edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep sys.stdin = EncodedFile(sys.stdin, 'utf-8', 'latin-1') 1099