183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh""" codecs -- Python Codec Registry, API and helpers. 283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehWritten by Marc-Andre Lemburg (mal@lemburg.com). 583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. 783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh"""#" 983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 1083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport __builtin__, sys 1183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 1283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh### Registry and builtin stateless codec functions 1383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 1483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehtry: 1583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh from _codecs import * 1683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehexcept ImportError, why: 1783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise SystemError('Failed to load the builtin codecs: %s' % why) 1883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 1983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE", 2083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE", 2183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE", 2283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE", 2383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "strict_errors", "ignore_errors", "replace_errors", 2483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "xmlcharrefreplace_errors", 2583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "register_error", "lookup_error"] 2683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 2783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh### Constants 2883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 2983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 3083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF) 3183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# and its possible byte string values 3283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# for UTF8/UTF16/UTF32 output and little/big endian machines 3383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 3483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 3583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# UTF-8 3683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehBOM_UTF8 = '\xef\xbb\xbf' 3783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 3883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# UTF-16, little endian 3983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehBOM_LE = BOM_UTF16_LE = '\xff\xfe' 4083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 4183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# UTF-16, big endian 4283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehBOM_BE = BOM_UTF16_BE = '\xfe\xff' 4383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 4483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# UTF-32, little endian 4583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehBOM_UTF32_LE = '\xff\xfe\x00\x00' 4683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 4783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# UTF-32, big endian 4883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehBOM_UTF32_BE = '\x00\x00\xfe\xff' 4983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 5083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehif sys.byteorder == 'little': 5183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 5283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # UTF-16, native endianness 5383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh BOM = BOM_UTF16 = BOM_UTF16_LE 5483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 5583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # UTF-32, native endianness 5683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh BOM_UTF32 = BOM_UTF32_LE 5783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 5883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehelse: 5983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 6083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # UTF-16, native endianness 6183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh BOM = BOM_UTF16 = BOM_UTF16_BE 6283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 6383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # UTF-32, native endianness 6483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh BOM_UTF32 = BOM_UTF32_BE 6583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 6683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Old broken names (don't use in new code) 6783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehBOM32_LE = BOM_UTF16_LE 6883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehBOM32_BE = BOM_UTF16_BE 6983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehBOM64_LE = BOM_UTF32_LE 7083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehBOM64_BE = BOM_UTF32_BE 7183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 7283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 7383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh### Codec base classes (defining the API) 7483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 7583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass CodecInfo(tuple): 7683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 7783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __new__(cls, encode, decode, streamreader=None, streamwriter=None, 7883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh incrementalencoder=None, incrementaldecoder=None, name=None): 7983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter)) 8083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.name = name 8183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.encode = encode 8283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.decode = decode 8383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.incrementalencoder = incrementalencoder 8483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.incrementaldecoder = incrementaldecoder 8583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.streamwriter = streamwriter 8683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.streamreader = streamreader 8783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self 8883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 8983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __repr__(self): 9083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return "<%s.%s object for encoding %s at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self)) 9183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 9283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass Codec: 9383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 9483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Defines the interface for stateless encoders/decoders. 9583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 9683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh The .encode()/.decode() methods may use different error 9783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh handling schemes by providing the errors argument. These 9883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh string values are predefined: 9983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 10083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'strict' - raise a ValueError error (or a subclass) 10183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'ignore' - ignore the character and continue with the next 10283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'replace' - replace with a suitable replacement character; 10383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Python will use the official U+FFFD REPLACEMENT 10483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh CHARACTER for the builtin Unicode codecs on 10583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh decoding and '?' on encoding. 10683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'xmlcharrefreplace' - Replace with the appropriate XML 10783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh character reference (only for encoding). 10883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'backslashreplace' - Replace with backslashed escape sequences 10983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh (only for encoding). 11083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 11183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh The set of allowed values can be extended via register_error. 11283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 11383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 11483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def encode(self, input, errors='strict'): 11583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 11683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Encodes the object input and returns a tuple (output 11783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh object, length consumed). 11883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 11983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh errors defines the error handling to apply. It defaults to 12083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'strict' handling. 12183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 12283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh The method may not store state in the Codec instance. Use 12383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh StreamCodec for codecs which have to keep state in order to 12483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh make encoding/decoding efficient. 12583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 12683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh The encoder must be able to handle zero length input and 12783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return an empty object of the output object type in this 12883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh situation. 12983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 13083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 13183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise NotImplementedError 13283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 13383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def decode(self, input, errors='strict'): 13483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 13583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Decodes the object input and returns a tuple (output 13683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh object, length consumed). 13783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 13883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh input must be an object which provides the bf_getreadbuf 13983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh buffer slot. Python strings, buffer objects and memory 14083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh mapped files are examples of objects providing this slot. 14183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 14283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh errors defines the error handling to apply. It defaults to 14383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'strict' handling. 14483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 14583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh The method may not store state in the Codec instance. Use 14683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh StreamCodec for codecs which have to keep state in order to 14783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh make encoding/decoding efficient. 14883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 14983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh The decoder must be able to handle zero length input and 15083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return an empty object of the output object type in this 15183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh situation. 15283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 15383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 15483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise NotImplementedError 15583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 15683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass IncrementalEncoder(object): 15783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 15883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh An IncrementalEncoder encodes an input in multiple steps. The input can be 15983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh passed piece by piece to the encode() method. The IncrementalEncoder remembers 16083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh the state of the Encoding process between calls to encode(). 16183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 16283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __init__(self, errors='strict'): 16383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 16483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Creates an IncrementalEncoder instance. 16583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 16683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh The IncrementalEncoder may use different error handling schemes by 16783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh providing the errors keyword argument. See the module docstring 16883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for a list of possible values. 16983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 17083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.errors = errors 17183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.buffer = "" 17283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 17383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def encode(self, input, final=False): 17483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 17583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Encodes input and returns the resulting object. 17683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 17783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise NotImplementedError 17883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 17983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def reset(self): 18083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 18183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Resets the encoder to the initial state. 18283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 18383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 18483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def getstate(self): 18583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 18683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Return the current state of the encoder. 18783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 18883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return 0 18983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 19083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def setstate(self, state): 19183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 19283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Set the current state of the encoder. state must have been 19383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh returned by getstate(). 19483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 19583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 19683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass BufferedIncrementalEncoder(IncrementalEncoder): 19783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 19883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh This subclass of IncrementalEncoder can be used as the baseclass for an 19983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh incremental encoder if the encoder must keep some of the output in a 20083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh buffer between calls to encode(). 20183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 20283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __init__(self, errors='strict'): 20383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh IncrementalEncoder.__init__(self, errors) 20483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.buffer = "" # unencoded input that is kept between calls to encode() 20583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 20683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def _buffer_encode(self, input, errors, final): 20783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Overwrite this method in subclasses: It must encode input 20883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # and return an (output, length consumed) tuple 20983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise NotImplementedError 21083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 21183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def encode(self, input, final=False): 21283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # encode input (taking the buffer into account) 21383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data = self.buffer + input 21483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh (result, consumed) = self._buffer_encode(data, self.errors, final) 21583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # keep unencoded input until the next call 21683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.buffer = data[consumed:] 21783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return result 21883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 21983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def reset(self): 22083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh IncrementalEncoder.reset(self) 22183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.buffer = "" 22283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 22383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def getstate(self): 22483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self.buffer or 0 22583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 22683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def setstate(self, state): 22783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.buffer = state or "" 22883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 22983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass IncrementalDecoder(object): 23083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 23183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh An IncrementalDecoder decodes an input in multiple steps. The input can be 23283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh passed piece by piece to the decode() method. The IncrementalDecoder 23383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh remembers the state of the decoding process between calls to decode(). 23483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 23583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __init__(self, errors='strict'): 23683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 23783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Creates a IncrementalDecoder instance. 23883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 23983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh The IncrementalDecoder may use different error handling schemes by 24083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh providing the errors keyword argument. See the module docstring 24183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for a list of possible values. 24283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 24383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.errors = errors 24483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 24583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def decode(self, input, final=False): 24683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 24783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Decodes input and returns the resulting object. 24883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 24983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise NotImplementedError 25083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 25183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def reset(self): 25283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 25383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Resets the decoder to the initial state. 25483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 25583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 25683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def getstate(self): 25783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 25883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Return the current state of the decoder. 25983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 26083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh This must be a (buffered_input, additional_state_info) tuple. 26183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh buffered_input must be a bytes object containing bytes that 26283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh were passed to decode() that have not yet been converted. 26383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh additional_state_info must be a non-negative integer 26483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh representing the state of the decoder WITHOUT yet having 26583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh processed the contents of buffered_input. In the initial state 26683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh and after reset(), getstate() must return (b"", 0). 26783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 26883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return (b"", 0) 26983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 27083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def setstate(self, state): 27183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 27283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Set the current state of the decoder. 27383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 27483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh state must have been returned by getstate(). The effect of 27583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh setstate((b"", 0)) must be equivalent to reset(). 27683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 27783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 27883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass BufferedIncrementalDecoder(IncrementalDecoder): 27983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 28083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh This subclass of IncrementalDecoder can be used as the baseclass for an 28183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh incremental decoder if the decoder must be able to handle incomplete byte 28283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh sequences. 28383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 28483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __init__(self, errors='strict'): 28583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh IncrementalDecoder.__init__(self, errors) 28683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.buffer = "" # undecoded input that is kept between calls to decode() 28783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 28883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def _buffer_decode(self, input, errors, final): 28983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Overwrite this method in subclasses: It must decode input 29083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # and return an (output, length consumed) tuple 29183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise NotImplementedError 29283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 29383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def decode(self, input, final=False): 29483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # decode input (taking the buffer into account) 29583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data = self.buffer + input 29683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh (result, consumed) = self._buffer_decode(data, self.errors, final) 29783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # keep undecoded input until the next call 29883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.buffer = data[consumed:] 29983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return result 30083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 30183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def reset(self): 30283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh IncrementalDecoder.reset(self) 30383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.buffer = "" 30483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 30583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def getstate(self): 30683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # additional state info is always 0 30783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return (self.buffer, 0) 30883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 30983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def setstate(self, state): 31083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # ignore additional state info 31183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.buffer = state[0] 31283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 31383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 31483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# The StreamWriter and StreamReader class provide generic working 31583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# interfaces which can be used to implement new encoding submodules 31683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# very easily. See encodings/utf_8.py for an example on how this is 31783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# done. 31883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 31983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 32083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass StreamWriter(Codec): 32183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 32283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __init__(self, stream, errors='strict'): 32383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 32483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Creates a StreamWriter instance. 32583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 32683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh stream must be a file-like object open for writing 32783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh (binary) data. 32883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 32983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh The StreamWriter may use different error handling 33083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh schemes by providing the errors keyword argument. These 33183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh parameters are predefined: 33283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 33383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'strict' - raise a ValueError (or a subclass) 33483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'ignore' - ignore the character and continue with the next 33583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'replace'- replace with a suitable replacement character 33683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'xmlcharrefreplace' - Replace with the appropriate XML 33783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh character reference. 33883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'backslashreplace' - Replace with backslashed escape 33983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh sequences (only for encoding). 34083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 34183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh The set of allowed parameter values can be extended via 34283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh register_error. 34383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 34483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.stream = stream 34583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.errors = errors 34683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 34783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def write(self, object): 34883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 34983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Writes the object's contents encoded to self.stream. 35083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 35183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data, consumed = self.encode(object, self.errors) 35283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.stream.write(data) 35383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 35483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def writelines(self, list): 35583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 35683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Writes the concatenated list of strings to the stream 35783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh using .write(). 35883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 35983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.write(''.join(list)) 36083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 36183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def reset(self): 36283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 36383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Flushes and resets the codec buffers used for keeping state. 36483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 36583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Calling this method should ensure that the data on the 36683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh output is put into a clean state, that allows appending 36783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh of new fresh data without having to rescan the whole 36883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh stream to recover state. 36983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 37083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 37183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh pass 37283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 37383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def seek(self, offset, whence=0): 37483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.stream.seek(offset, whence) 37583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if whence == 0 and offset == 0: 37683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.reset() 37783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 37883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __getattr__(self, name, 37983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh getattr=getattr): 38083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 38183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Inherit all other methods from the underlying stream. 38283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 38383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return getattr(self.stream, name) 38483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 38583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __enter__(self): 38683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self 38783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 38883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __exit__(self, type, value, tb): 38983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.stream.close() 39083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 39183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh### 39283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 39383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass StreamReader(Codec): 39483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 39583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __init__(self, stream, errors='strict'): 39683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 39783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Creates a StreamReader instance. 39883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 39983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh stream must be a file-like object open for reading 40083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh (binary) data. 40183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 40283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh The StreamReader may use different error handling 40383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh schemes by providing the errors keyword argument. These 40483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh parameters are predefined: 40583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 40683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'strict' - raise a ValueError (or a subclass) 40783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'ignore' - ignore the character and continue with the next 40883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'replace'- replace with a suitable replacement character; 40983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 41083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh The set of allowed parameter values can be extended via 41183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh register_error. 41283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 41383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.stream = stream 41483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.errors = errors 41583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.bytebuffer = "" 41683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # For str->str decoding this will stay a str 41783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # For str->unicode decoding the first read will promote it to unicode 41883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.charbuffer = "" 41983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.linebuffer = None 42083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 42183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def decode(self, input, errors='strict'): 42283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise NotImplementedError 42383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 42483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def read(self, size=-1, chars=-1, firstline=False): 42583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 42683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Decodes data from the stream self.stream and returns the 42783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh resulting object. 42883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 42983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh chars indicates the number of characters to read from the 43083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh stream. read() will never return more than chars 43183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh characters, but it might return less, if there are not enough 43283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh characters available. 43383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 43483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh size indicates the approximate maximum number of bytes to 43583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh read from the stream for decoding purposes. The decoder 43683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh can modify this setting as appropriate. The default value 43783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh -1 indicates to read and decode as much as possible. size 43883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh is intended to prevent having to decode huge files in one 43983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh step. 44083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 44183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh If firstline is true, and a UnicodeDecodeError happens 44283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh after the first line terminator in the input only the first line 44383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh will be returned, the rest of the input will be kept until the 44483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh next call to read(). 44583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 44683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh The method should use a greedy read strategy meaning that 44783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh it should read as much data as is allowed within the 44883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh definition of the encoding and the given size, e.g. if 44983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh optional encoding endings or state markers are available 45083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh on the stream, these should be read too. 45183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 45283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # If we have lines cached, first merge them back into characters 45383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if self.linebuffer: 45483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.charbuffer = "".join(self.linebuffer) 45583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.linebuffer = None 45683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 45783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # read until we get the required number of characters (if available) 45883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh while True: 45983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # can the request can be satisfied from the character buffer? 46083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if chars < 0: 46183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if size < 0: 46283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if self.charbuffer: 46383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh break 46483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh elif len(self.charbuffer) >= size: 46583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh break 46683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 46783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if len(self.charbuffer) >= chars: 46883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh break 46983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # we need more data 47083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if size < 0: 47183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh newdata = self.stream.read() 47283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 47383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh newdata = self.stream.read(size) 47483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # decode bytes (those remaining from the last call included) 47583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data = self.bytebuffer + newdata 47683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh try: 47783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh newchars, decodedbytes = self.decode(data, self.errors) 47883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh except UnicodeDecodeError, exc: 47983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if firstline: 48083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh newchars, decodedbytes = self.decode(data[:exc.start], self.errors) 48183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh lines = newchars.splitlines(True) 48283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if len(lines)<=1: 48383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise 48483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 48583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise 48683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # keep undecoded bytes until the next call 48783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.bytebuffer = data[decodedbytes:] 48883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # put new characters in the character buffer 48983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.charbuffer += newchars 49083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # there was no data available 49183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if not newdata: 49283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh break 49383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if chars < 0: 49483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Return everything we've got 49583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh result = self.charbuffer 49683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.charbuffer = "" 49783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 49883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Return the first chars characters 49983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh result = self.charbuffer[:chars] 50083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.charbuffer = self.charbuffer[chars:] 50183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return result 50283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 50383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def readline(self, size=None, keepends=True): 50483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 50583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Read one line from the input stream and return the 50683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh decoded data. 50783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 50883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh size, if given, is passed as size argument to the 50983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh read() method. 51083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 51183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 51283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # If we have lines cached from an earlier read, return 51383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # them unconditionally 51483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if self.linebuffer: 51583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh line = self.linebuffer[0] 51683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh del self.linebuffer[0] 51783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if len(self.linebuffer) == 1: 51883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # revert to charbuffer mode; we might need more data 51983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # next time 52083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.charbuffer = self.linebuffer[0] 52183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.linebuffer = None 52283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if not keepends: 52383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh line = line.splitlines(False)[0] 52483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return line 52583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 52683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh readsize = size or 72 52783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh line = "" 52883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # If size is given, we call read() only once 52983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh while True: 53083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data = self.read(readsize, firstline=True) 53183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if data: 53283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # If we're at a "\r" read one extra character (which might 53383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # be a "\n") to get a proper line ending. If the stream is 53483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # temporarily exhausted we return the wrong line ending. 53583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if data.endswith("\r"): 53683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data += self.read(size=1, chars=1) 53783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 53883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh line += data 53983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh lines = line.splitlines(True) 54083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if lines: 54183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if len(lines) > 1: 54283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # More than one line result; the first line is a full line 54383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # to return 54483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh line = lines[0] 54583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh del lines[0] 54683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if len(lines) > 1: 54783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # cache the remaining lines 54883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh lines[-1] += self.charbuffer 54983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.linebuffer = lines 55083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.charbuffer = None 55183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 55283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # only one remaining line, put it back into charbuffer 55383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.charbuffer = lines[0] + self.charbuffer 55483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if not keepends: 55583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh line = line.splitlines(False)[0] 55683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh break 55783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh line0withend = lines[0] 55883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh line0withoutend = lines[0].splitlines(False)[0] 55983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if line0withend != line0withoutend: # We really have a line end 56083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Put the rest back together and keep it until the next call 56183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.charbuffer = "".join(lines[1:]) + self.charbuffer 56283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if keepends: 56383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh line = line0withend 56483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 56583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh line = line0withoutend 56683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh break 56783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # we didn't get anything or this was our only try 56883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if not data or size is not None: 56983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if line and not keepends: 57083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh line = line.splitlines(False)[0] 57183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh break 57283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if readsize<8000: 57383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh readsize *= 2 57483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return line 57583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 57683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def readlines(self, sizehint=None, keepends=True): 57783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 57883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Read all lines available on the input stream 57983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh and return them as list of lines. 58083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 58183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Line breaks are implemented using the codec's decoder 58283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh method and are included in the list entries. 58383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 58483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh sizehint, if given, is ignored since there is no efficient 58583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh way to finding the true end-of-line. 58683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 58783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 58883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data = self.read() 58983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return data.splitlines(keepends) 59083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 59183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def reset(self): 59283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 59383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Resets the codec buffers used for keeping state. 59483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 59583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Note that no stream repositioning should take place. 59683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh This method is primarily intended to be able to recover 59783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh from decoding errors. 59883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 59983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 60083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.bytebuffer = "" 60183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.charbuffer = u"" 60283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.linebuffer = None 60383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 60483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def seek(self, offset, whence=0): 60583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Set the input stream's current position. 60683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 60783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Resets the codec buffers used for keeping state. 60883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 60983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.stream.seek(offset, whence) 61083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.reset() 61183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 61283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def next(self): 61383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 61483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Return the next decoded line from the input stream.""" 61583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh line = self.readline() 61683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if line: 61783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return line 61883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise StopIteration 61983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 62083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __iter__(self): 62183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self 62283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 62383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __getattr__(self, name, 62483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh getattr=getattr): 62583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 62683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Inherit all other methods from the underlying stream. 62783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 62883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return getattr(self.stream, name) 62983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 63083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __enter__(self): 63183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self 63283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 63383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __exit__(self, type, value, tb): 63483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.stream.close() 63583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 63683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh### 63783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 63883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass StreamReaderWriter: 63983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 64083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ StreamReaderWriter instances allow wrapping streams which 64183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh work in both read and write modes. 64283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 64383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh The design is such that one can use the factory functions 64483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh returned by the codec.lookup() function to construct the 64583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh instance. 64683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 64783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 64883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Optional attributes set by the file wrappers below 64983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh encoding = 'unknown' 65083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 65183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __init__(self, stream, Reader, Writer, errors='strict'): 65283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 65383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Creates a StreamReaderWriter instance. 65483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 65583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh stream must be a Stream-like object. 65683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 65783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Reader, Writer must be factory functions or classes 65883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh providing the StreamReader, StreamWriter interface resp. 65983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 66083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Error handling is done in the same way as defined for the 66183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh StreamWriter/Readers. 66283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 66383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 66483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.stream = stream 66583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.reader = Reader(stream, errors) 66683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.writer = Writer(stream, errors) 66783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.errors = errors 66883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 66983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def read(self, size=-1): 67083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 67183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self.reader.read(size) 67283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 67383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def readline(self, size=None): 67483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 67583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self.reader.readline(size) 67683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 67783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def readlines(self, sizehint=None): 67883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 67983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self.reader.readlines(sizehint) 68083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 68183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def next(self): 68283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 68383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Return the next decoded line from the input stream.""" 68483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self.reader.next() 68583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 68683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __iter__(self): 68783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self 68883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 68983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def write(self, data): 69083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 69183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self.writer.write(data) 69283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 69383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def writelines(self, list): 69483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 69583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self.writer.writelines(list) 69683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 69783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def reset(self): 69883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 69983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.reader.reset() 70083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.writer.reset() 70183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 70283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def seek(self, offset, whence=0): 70383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.stream.seek(offset, whence) 70483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.reader.reset() 70583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if whence == 0 and offset == 0: 70683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.writer.reset() 70783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 70883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __getattr__(self, name, 70983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh getattr=getattr): 71083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 71183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Inherit all other methods from the underlying stream. 71283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 71383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return getattr(self.stream, name) 71483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 71583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # these are needed to make "with codecs.open(...)" work properly 71683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 71783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __enter__(self): 71883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self 71983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 72083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __exit__(self, type, value, tb): 72183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.stream.close() 72283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 72383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh### 72483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 72583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass StreamRecoder: 72683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 72783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ StreamRecoder instances provide a frontend - backend 72883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh view of encoding data. 72983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 73083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh They use the complete set of APIs returned by the 73183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh codecs.lookup() function to implement their task. 73283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 73383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Data written to the stream is first decoded into an 73483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh intermediate format (which is dependent on the given codec 73583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh combination) and then written to the stream using an instance 73683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh of the provided Writer class. 73783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 73883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh In the other direction, data is read from the stream using a 73983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Reader instance and then return encoded data to the caller. 74083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 74183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 74283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Optional attributes set by the file wrappers below 74383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data_encoding = 'unknown' 74483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh file_encoding = 'unknown' 74583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 74683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __init__(self, stream, encode, decode, Reader, Writer, 74783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh errors='strict'): 74883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 74983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Creates a StreamRecoder instance which implements a two-way 75083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh conversion: encode and decode work on the frontend (the 75183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh input to .read() and output of .write()) while 75283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Reader and Writer work on the backend (reading and 75383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh writing to the stream). 75483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 75583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh You can use these objects to do transparent direct 75683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh recodings from e.g. latin-1 to utf-8 and back. 75783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 75883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh stream must be a file-like object. 75983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 76083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh encode, decode must adhere to the Codec interface, Reader, 76183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Writer must be factory functions or classes providing the 76283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh StreamReader, StreamWriter interface resp. 76383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 76483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh encode and decode are needed for the frontend translation, 76583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Reader and Writer for the backend translation. Unicode is 76683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh used as intermediate encoding. 76783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 76883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Error handling is done in the same way as defined for the 76983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh StreamWriter/Readers. 77083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 77183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 77283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.stream = stream 77383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.encode = encode 77483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.decode = decode 77583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.reader = Reader(stream, errors) 77683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.writer = Writer(stream, errors) 77783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.errors = errors 77883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 77983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def read(self, size=-1): 78083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 78183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data = self.reader.read(size) 78283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data, bytesencoded = self.encode(data, self.errors) 78383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return data 78483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 78583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def readline(self, size=None): 78683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 78783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if size is None: 78883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data = self.reader.readline() 78983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 79083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data = self.reader.readline(size) 79183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data, bytesencoded = self.encode(data, self.errors) 79283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return data 79383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 79483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def readlines(self, sizehint=None): 79583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 79683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data = self.reader.read() 79783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data, bytesencoded = self.encode(data, self.errors) 79883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return data.splitlines(1) 79983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 80083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def next(self): 80183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 80283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Return the next decoded line from the input stream.""" 80383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data = self.reader.next() 80483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data, bytesencoded = self.encode(data, self.errors) 80583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return data 80683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 80783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __iter__(self): 80883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self 80983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 81083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def write(self, data): 81183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 81283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data, bytesdecoded = self.decode(data, self.errors) 81383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self.writer.write(data) 81483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 81583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def writelines(self, list): 81683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 81783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data = ''.join(list) 81883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data, bytesdecoded = self.decode(data, self.errors) 81983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self.writer.write(data) 82083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 82183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def reset(self): 82283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 82383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.reader.reset() 82483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.writer.reset() 82583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 82683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __getattr__(self, name, 82783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh getattr=getattr): 82883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 82983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Inherit all other methods from the underlying stream. 83083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 83183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return getattr(self.stream, name) 83283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 83383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __enter__(self): 83483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self 83583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 83683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __exit__(self, type, value, tb): 83783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.stream.close() 83883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 83983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh### Shortcuts 84083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 84183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef open(filename, mode='rb', encoding=None, errors='strict', buffering=1): 84283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 84383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Open an encoded file using the given mode and return 84483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh a wrapped version providing transparent encoding/decoding. 84583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 84683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Note: The wrapped version will only accept the object format 84783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh defined by the codecs, i.e. Unicode objects for most builtin 84883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh codecs. Output is also codec dependent and will usually be 84983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Unicode as well. 85083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 85183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Files are always opened in binary mode, even if no binary mode 85283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh was specified. This is done to avoid data loss due to encodings 85383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh using 8-bit values. The default file mode is 'rb' meaning to 85483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh open the file in binary read mode. 85583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 85683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh encoding specifies the encoding which is to be used for the 85783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh file. 85883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 85983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh errors may be given to define the error handling. It defaults 86083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh to 'strict' which causes ValueErrors to be raised in case an 86183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh encoding error occurs. 86283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 86383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh buffering has the same meaning as for the builtin open() API. 86483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh It defaults to line buffered. 86583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 86683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh The returned wrapped file object provides an extra attribute 86783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh .encoding which allows querying the used encoding. This 86883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh attribute is only available if an encoding was specified as 86983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh parameter. 87083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 87183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 87283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if encoding is not None: 87383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if 'U' in mode: 87483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # No automatic conversion of '\n' is done on reading and writing 87583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh mode = mode.strip().replace('U', '') 87683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if mode[:1] not in set('rwa'): 87783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh mode = 'r' + mode 87883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if 'b' not in mode: 87983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Force opening of the file in binary mode 88083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh mode = mode + 'b' 88183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh file = __builtin__.open(filename, mode, buffering) 88283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if encoding is None: 88383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return file 88483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh info = lookup(encoding) 88583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh srw = StreamReaderWriter(file, info.streamreader, info.streamwriter, errors) 88683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Add attributes to simplify introspection 88783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh srw.encoding = encoding 88883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return srw 88983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 89083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef EncodedFile(file, data_encoding, file_encoding=None, errors='strict'): 89183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 89283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Return a wrapped version of file which provides transparent 89383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh encoding translation. 89483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 89583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Strings written to the wrapped file are interpreted according 89683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh to the given data_encoding and then written to the original 89783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh file as string using file_encoding. The intermediate encoding 89883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh will usually be Unicode but depends on the specified codecs. 89983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 90083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Strings are read from the file using file_encoding and then 90183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh passed back to the caller as string using data_encoding. 90283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 90383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh If file_encoding is not given, it defaults to data_encoding. 90483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 90583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh errors may be given to define the error handling. It defaults 90683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh to 'strict' which causes ValueErrors to be raised in case an 90783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh encoding error occurs. 90883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 90983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh The returned wrapped file object provides two extra attributes 91083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh .data_encoding and .file_encoding which reflect the given 91183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh parameters of the same name. The attributes can be used for 91283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh introspection by Python programs. 91383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 91483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 91583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if file_encoding is None: 91683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh file_encoding = data_encoding 91783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh data_info = lookup(data_encoding) 91883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh file_info = lookup(file_encoding) 91983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh sr = StreamRecoder(file, data_info.encode, data_info.decode, 92083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh file_info.streamreader, file_info.streamwriter, errors) 92183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Add attributes to simplify introspection 92283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh sr.data_encoding = data_encoding 92383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh sr.file_encoding = file_encoding 92483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return sr 92583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 92683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh### Helpers for codec lookup 92783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 92883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef getencoder(encoding): 92983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 93083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Lookup up the codec for the given encoding and return 93183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh its encoder function. 93283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 93383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Raises a LookupError in case the encoding cannot be found. 93483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 93583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 93683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return lookup(encoding).encode 93783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 93883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef getdecoder(encoding): 93983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 94083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Lookup up the codec for the given encoding and return 94183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh its decoder function. 94283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 94383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Raises a LookupError in case the encoding cannot be found. 94483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 94583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 94683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return lookup(encoding).decode 94783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 94883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef getincrementalencoder(encoding): 94983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 95083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Lookup up the codec for the given encoding and return 95183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh its IncrementalEncoder class or factory function. 95283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 95383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Raises a LookupError in case the encoding cannot be found 95483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh or the codecs doesn't provide an incremental encoder. 95583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 95683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 95783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh encoder = lookup(encoding).incrementalencoder 95883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if encoder is None: 95983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise LookupError(encoding) 96083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return encoder 96183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 96283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef getincrementaldecoder(encoding): 96383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 96483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Lookup up the codec for the given encoding and return 96583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh its IncrementalDecoder class or factory function. 96683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 96783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Raises a LookupError in case the encoding cannot be found 96883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh or the codecs doesn't provide an incremental decoder. 96983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 97083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 97183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh decoder = lookup(encoding).incrementaldecoder 97283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if decoder is None: 97383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise LookupError(encoding) 97483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return decoder 97583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 97683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef getreader(encoding): 97783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 97883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Lookup up the codec for the given encoding and return 97983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh its StreamReader class or factory function. 98083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 98183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Raises a LookupError in case the encoding cannot be found. 98283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 98383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 98483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return lookup(encoding).streamreader 98583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 98683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef getwriter(encoding): 98783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 98883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Lookup up the codec for the given encoding and return 98983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh its StreamWriter class or factory function. 99083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 99183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Raises a LookupError in case the encoding cannot be found. 99283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 99383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 99483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return lookup(encoding).streamwriter 99583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 99683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef iterencode(iterator, encoding, errors='strict', **kwargs): 99783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 99883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Encoding iterator. 99983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 100083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Encodes the input strings from the iterator using a IncrementalEncoder. 100183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 100283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh errors and kwargs are passed through to the IncrementalEncoder 100383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh constructor. 100483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 100583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh encoder = getincrementalencoder(encoding)(errors, **kwargs) 100683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for input in iterator: 100783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh output = encoder.encode(input) 100883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if output: 100983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh yield output 101083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh output = encoder.encode("", True) 101183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if output: 101283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh yield output 101383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 101483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef iterdecode(iterator, encoding, errors='strict', **kwargs): 101583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 101683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Decoding iterator. 101783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 101883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Decodes the input strings from the iterator using a IncrementalDecoder. 101983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 102083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh errors and kwargs are passed through to the IncrementalDecoder 102183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh constructor. 102283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 102383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh decoder = getincrementaldecoder(encoding)(errors, **kwargs) 102483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for input in iterator: 102583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh output = decoder.decode(input) 102683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if output: 102783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh yield output 102883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh output = decoder.decode("", True) 102983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if output: 103083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh yield output 103183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 103283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh### Helpers for charmap-based codecs 103383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 103483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef make_identity_dict(rng): 103583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 103683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ make_identity_dict(rng) -> dict 103783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 103883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Return a dictionary where elements of the rng sequence are 103983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh mapped to themselves. 104083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 104183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 104283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh res = {} 104383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for i in rng: 104483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh res[i]=i 104583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return res 104683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 104783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef make_encoding_map(decoding_map): 104883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 104983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ Creates an encoding map from a decoding map. 105083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 105183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh If a target mapping in the decoding map occurs multiple 105283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh times, then that target is mapped to None (undefined mapping), 105383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh causing an exception when encountered by the charmap codec 105483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh during translation. 105583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 105683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh One example where this happens is cp875.py which decodes 105783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh multiple character to \u001a. 105883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 105983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 106083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh m = {} 106183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for k,v in decoding_map.items(): 106283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if not v in m: 106383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh m[v] = k 106483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 106583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh m[v] = None 106683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return m 106783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 106883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh### error handlers 106983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 107083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehtry: 107183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh strict_errors = lookup_error("strict") 107283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh ignore_errors = lookup_error("ignore") 107383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh replace_errors = lookup_error("replace") 107483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace") 107583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh backslashreplace_errors = lookup_error("backslashreplace") 107683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehexcept LookupError: 107783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # In --disable-unicode builds, these error handler are missing 107883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh strict_errors = None 107983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh ignore_errors = None 108083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh replace_errors = None 108183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh xmlcharrefreplace_errors = None 108283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh backslashreplace_errors = None 108383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 108483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Tell modulefinder that using codecs probably needs the encodings 108583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# package 108683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh_false = 0 108783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehif _false: 108883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh import encodings 108983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 109083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh### Tests 109183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 109283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehif __name__ == '__main__': 109383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 109483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Make stdout translate Latin-1 output into UTF-8 output 109583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'utf-8') 109683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 109783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Have stdin translate Latin-1 input into UTF-8 input 109883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh sys.stdin = EncodedFile(sys.stdin, 'utf-8', 'latin-1') 1099