1edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep""" codecs -- Python Codec Registry, API and helpers.
2edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
3edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
4edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepWritten by Marc-Andre Lemburg (mal@lemburg.com).
5edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
6edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
8edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep"""#"
9edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
10edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepimport __builtin__, sys
11edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
12edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### Registry and builtin stateless codec functions
13edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
14edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoeptry:
15edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    from _codecs import *
16edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepexcept ImportError, why:
17edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    raise SystemError('Failed to load the builtin codecs: %s' % why)
18edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
19edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",
20edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",
21edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE",
22edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           "BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE",
23edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           "strict_errors", "ignore_errors", "replace_errors",
24edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           "xmlcharrefreplace_errors",
25edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep           "register_error", "lookup_error"]
26edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
27edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### Constants
28edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
29edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep#
30edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF)
31edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# and its possible byte string values
32edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# for UTF8/UTF16/UTF32 output and little/big endian machines
33edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep#
34edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
35edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# UTF-8
36edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepBOM_UTF8 = '\xef\xbb\xbf'
37edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
38edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# UTF-16, little endian
39edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepBOM_LE = BOM_UTF16_LE = '\xff\xfe'
40edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
41edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# UTF-16, big endian
42edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepBOM_BE = BOM_UTF16_BE = '\xfe\xff'
43edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
44edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# UTF-32, little endian
45edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepBOM_UTF32_LE = '\xff\xfe\x00\x00'
46edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
47edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# UTF-32, big endian
48edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepBOM_UTF32_BE = '\x00\x00\xfe\xff'
49edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
50edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepif sys.byteorder == 'little':
51edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
52edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # UTF-16, native endianness
53edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    BOM = BOM_UTF16 = BOM_UTF16_LE
54edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
55edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # UTF-32, native endianness
56edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    BOM_UTF32 = BOM_UTF32_LE
57edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
58edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepelse:
59edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
60edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # UTF-16, native endianness
61edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    BOM = BOM_UTF16 = BOM_UTF16_BE
62edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
63edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # UTF-32, native endianness
64edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    BOM_UTF32 = BOM_UTF32_BE
65edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
66edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# Old broken names (don't use in new code)
67edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepBOM32_LE = BOM_UTF16_LE
68edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepBOM32_BE = BOM_UTF16_BE
69edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepBOM64_LE = BOM_UTF32_LE
70edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander StoepBOM64_BE = BOM_UTF32_BE
71edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
72edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
73edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### Codec base classes (defining the API)
74edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
75edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass CodecInfo(tuple):
76edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
77edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
78edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        incrementalencoder=None, incrementaldecoder=None, name=None):
79edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
80edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.name = name
81edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.encode = encode
82edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.decode = decode
83edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.incrementalencoder = incrementalencoder
84edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.incrementaldecoder = incrementaldecoder
85edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.streamwriter = streamwriter
86edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.streamreader = streamreader
87edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return self
88edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
89edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __repr__(self):
90edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return "<%s.%s object for encoding %s at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))
91edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
92edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass Codec:
93edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
94edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """ Defines the interface for stateless encoders/decoders.
95edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
96edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        The .encode()/.decode() methods may use different error
97edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        handling schemes by providing the errors argument. These
98edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        string values are predefined:
99edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
100edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep         'strict' - raise a ValueError error (or a subclass)
101edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep         'ignore' - ignore the character and continue with the next
102edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep         'replace' - replace with a suitable replacement character;
103edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    Python will use the official U+FFFD REPLACEMENT
104edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    CHARACTER for the builtin Unicode codecs on
105edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    decoding and '?' on encoding.
106edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep         'xmlcharrefreplace' - Replace with the appropriate XML
107edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                               character reference (only for encoding).
108edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep         'backslashreplace'  - Replace with backslashed escape sequences
109edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                               (only for encoding).
110edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
111edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        The set of allowed values can be extended via register_error.
112edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
113edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
114edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def encode(self, input, errors='strict'):
115edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
116edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """ Encodes the object input and returns a tuple (output
117edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            object, length consumed).
118edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
119edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            errors defines the error handling to apply. It defaults to
120edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            'strict' handling.
121edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
122edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            The method may not store state in the Codec instance. Use
123edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            StreamCodec for codecs which have to keep state in order to
124edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            make encoding/decoding efficient.
125edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
126edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            The encoder must be able to handle zero length input and
127edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            return an empty object of the output object type in this
128edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            situation.
129edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
130edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
131edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        raise NotImplementedError
132edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
133edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def decode(self, input, errors='strict'):
134edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
135edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """ Decodes the object input and returns a tuple (output
136edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            object, length consumed).
137edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
138edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            input must be an object which provides the bf_getreadbuf
139edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            buffer slot. Python strings, buffer objects and memory
140edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            mapped files are examples of objects providing this slot.
141edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
142edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            errors defines the error handling to apply. It defaults to
143edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            'strict' handling.
144edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
145edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            The method may not store state in the Codec instance. Use
146edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            StreamCodec for codecs which have to keep state in order to
147edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            make encoding/decoding efficient.
148edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
149edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            The decoder must be able to handle zero length input and
150edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            return an empty object of the output object type in this
151edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            situation.
152edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
153edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
154edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        raise NotImplementedError
155edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
156edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass IncrementalEncoder(object):
157edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
158edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    An IncrementalEncoder encodes an input in multiple steps. The input can be
159edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    passed piece by piece to the encode() method. The IncrementalEncoder remembers
160edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    the state of the Encoding process between calls to encode().
161edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
162edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __init__(self, errors='strict'):
163edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
164edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Creates an IncrementalEncoder instance.
165edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
166edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        The IncrementalEncoder may use different error handling schemes by
167edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        providing the errors keyword argument. See the module docstring
168edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        for a list of possible values.
169edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
170edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.errors = errors
171edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.buffer = ""
172edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
173edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def encode(self, input, final=False):
174edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
175edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Encodes input and returns the resulting object.
176edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
177edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        raise NotImplementedError
178edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
179edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def reset(self):
180edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
181edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Resets the encoder to the initial state.
182edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
183edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
184edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def getstate(self):
185edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
186edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Return the current state of the encoder.
187edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
188edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return 0
189edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
190edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def setstate(self, state):
191edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
192edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Set the current state of the encoder. state must have been
193edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        returned by getstate().
194edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
195edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
196edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass BufferedIncrementalEncoder(IncrementalEncoder):
197edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
198edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    This subclass of IncrementalEncoder can be used as the baseclass for an
199edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    incremental encoder if the encoder must keep some of the output in a
200edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    buffer between calls to encode().
201edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
202edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __init__(self, errors='strict'):
203edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        IncrementalEncoder.__init__(self, errors)
204edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.buffer = "" # unencoded input that is kept between calls to encode()
205edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
206edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def _buffer_encode(self, input, errors, final):
207edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Overwrite this method in subclasses: It must encode input
208edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # and return an (output, length consumed) tuple
209edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        raise NotImplementedError
210edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
211edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def encode(self, input, final=False):
212edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # encode input (taking the buffer into account)
213edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        data = self.buffer + input
214edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        (result, consumed) = self._buffer_encode(data, self.errors, final)
215edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # keep unencoded input until the next call
216edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.buffer = data[consumed:]
217edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return result
218edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
219edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def reset(self):
220edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        IncrementalEncoder.reset(self)
221edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.buffer = ""
222edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
223edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def getstate(self):
224edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return self.buffer or 0
225edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
226edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def setstate(self, state):
227edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.buffer = state or ""
228edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
229edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass IncrementalDecoder(object):
230edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
231edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    An IncrementalDecoder decodes an input in multiple steps. The input can be
232edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    passed piece by piece to the decode() method. The IncrementalDecoder
233edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    remembers the state of the decoding process between calls to decode().
234edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
235edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __init__(self, errors='strict'):
236edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
237edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Creates a IncrementalDecoder instance.
238edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
239edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        The IncrementalDecoder may use different error handling schemes by
240edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        providing the errors keyword argument. See the module docstring
241edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        for a list of possible values.
242edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
243edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.errors = errors
244edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
245edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def decode(self, input, final=False):
246edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
247edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Decodes input and returns the resulting object.
248edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
249edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        raise NotImplementedError
250edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
251edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def reset(self):
252edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
253edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Resets the decoder to the initial state.
254edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
255edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
256edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def getstate(self):
257edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
258edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Return the current state of the decoder.
259edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
260edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        This must be a (buffered_input, additional_state_info) tuple.
261edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        buffered_input must be a bytes object containing bytes that
262edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        were passed to decode() that have not yet been converted.
263edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        additional_state_info must be a non-negative integer
264edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        representing the state of the decoder WITHOUT yet having
265edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        processed the contents of buffered_input.  In the initial state
266edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        and after reset(), getstate() must return (b"", 0).
267edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
268edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return (b"", 0)
269edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
270edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def setstate(self, state):
271edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
272edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Set the current state of the decoder.
273edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
274edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        state must have been returned by getstate().  The effect of
275edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        setstate((b"", 0)) must be equivalent to reset().
276edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
277edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
278edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass BufferedIncrementalDecoder(IncrementalDecoder):
279edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
280edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    This subclass of IncrementalDecoder can be used as the baseclass for an
281edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    incremental decoder if the decoder must be able to handle incomplete byte
282edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    sequences.
283edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
284edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __init__(self, errors='strict'):
285edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        IncrementalDecoder.__init__(self, errors)
286edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.buffer = "" # undecoded input that is kept between calls to decode()
287edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
288edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def _buffer_decode(self, input, errors, final):
289edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Overwrite this method in subclasses: It must decode input
290edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # and return an (output, length consumed) tuple
291edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        raise NotImplementedError
292edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
293edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def decode(self, input, final=False):
294edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # decode input (taking the buffer into account)
295edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        data = self.buffer + input
296edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        (result, consumed) = self._buffer_decode(data, self.errors, final)
297edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # keep undecoded input until the next call
298edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.buffer = data[consumed:]
299edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return result
300edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
301edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def reset(self):
302edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        IncrementalDecoder.reset(self)
303edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.buffer = ""
304edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
305edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def getstate(self):
306edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # additional state info is always 0
307edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return (self.buffer, 0)
308edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
309edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def setstate(self, state):
310edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # ignore additional state info
311edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.buffer = state[0]
312edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
313edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep#
314edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# The StreamWriter and StreamReader class provide generic working
315edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# interfaces which can be used to implement new encoding submodules
316edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# very easily. See encodings/utf_8.py for an example on how this is
317edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# done.
318edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep#
319edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
320edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass StreamWriter(Codec):
321edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
322edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __init__(self, stream, errors='strict'):
323edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
324edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """ Creates a StreamWriter instance.
325edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
326edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            stream must be a file-like object open for writing
327edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            (binary) data.
328edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
329edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            The StreamWriter may use different error handling
330edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            schemes by providing the errors keyword argument. These
331edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            parameters are predefined:
332edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
333edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep             'strict' - raise a ValueError (or a subclass)
334edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep             'ignore' - ignore the character and continue with the next
335edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep             'replace'- replace with a suitable replacement character
336edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep             'xmlcharrefreplace' - Replace with the appropriate XML
337edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                                   character reference.
338edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep             'backslashreplace'  - Replace with backslashed escape
339edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                                   sequences (only for encoding).
340edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
341edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            The set of allowed parameter values can be extended via
342edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            register_error.
343edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
344edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.stream = stream
345edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.errors = errors
346edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
347edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def write(self, object):
348edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
349edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """ Writes the object's contents encoded to self.stream.
350edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
351edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        data, consumed = self.encode(object, self.errors)
352edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.stream.write(data)
353edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
354edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def writelines(self, list):
355edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
356edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """ Writes the concatenated list of strings to the stream
357edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            using .write().
358edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
359edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.write(''.join(list))
360edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
361edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def reset(self):
362edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
363edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """ Flushes and resets the codec buffers used for keeping state.
364edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
365edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            Calling this method should ensure that the data on the
366edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            output is put into a clean state, that allows appending
367edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            of new fresh data without having to rescan the whole
368edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            stream to recover state.
369edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
370edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
371edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        pass
372edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
373edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def seek(self, offset, whence=0):
374edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.stream.seek(offset, whence)
375edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if whence == 0 and offset == 0:
376edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.reset()
377edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
378edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __getattr__(self, name,
379edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    getattr=getattr):
380edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
381edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """ Inherit all other methods from the underlying stream.
382edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
383edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return getattr(self.stream, name)
384edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
385edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __enter__(self):
386edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return self
387edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
388edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __exit__(self, type, value, tb):
389edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.stream.close()
390edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
391edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep###
392edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
393edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass StreamReader(Codec):
394edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
395edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __init__(self, stream, errors='strict'):
396edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
397edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """ Creates a StreamReader instance.
398edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
399edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            stream must be a file-like object open for reading
400edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            (binary) data.
401edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
402edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            The StreamReader may use different error handling
403edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            schemes by providing the errors keyword argument. These
404edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            parameters are predefined:
405edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
406edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep             'strict' - raise a ValueError (or a subclass)
407edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep             'ignore' - ignore the character and continue with the next
408edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep             'replace'- replace with a suitable replacement character;
409edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
410edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            The set of allowed parameter values can be extended via
411edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            register_error.
412edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
413edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.stream = stream
414edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.errors = errors
415edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.bytebuffer = ""
416edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # For str->str decoding this will stay a str
417edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # For str->unicode decoding the first read will promote it to unicode
418edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.charbuffer = ""
419edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.linebuffer = None
420edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
421edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def decode(self, input, errors='strict'):
422edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        raise NotImplementedError
423edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
424edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def read(self, size=-1, chars=-1, firstline=False):
425edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
426edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """ Decodes data from the stream self.stream and returns the
427edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            resulting object.
428edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
429edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            chars indicates the number of characters to read from the
430edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            stream. read() will never return more than chars
431edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            characters, but it might return less, if there are not enough
432edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            characters available.
433edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
434edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            size indicates the approximate maximum number of bytes to
435edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            read from the stream for decoding purposes. The decoder
436edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            can modify this setting as appropriate. The default value
437edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            -1 indicates to read and decode as much as possible.  size
438edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            is intended to prevent having to decode huge files in one
439edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            step.
440edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
441edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            If firstline is true, and a UnicodeDecodeError happens
442edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            after the first line terminator in the input only the first line
443edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            will be returned, the rest of the input will be kept until the
444edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            next call to read().
445edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
446edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            The method should use a greedy read strategy meaning that
447edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            it should read as much data as is allowed within the
448edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            definition of the encoding and the given size, e.g.  if
449edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            optional encoding endings or state markers are available
450edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            on the stream, these should be read too.
451edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
452edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # If we have lines cached, first merge them back into characters
453edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.linebuffer:
454edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.charbuffer = "".join(self.linebuffer)
455edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.linebuffer = None
456edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
457edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # read until we get the required number of characters (if available)
458edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        while True:
459edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            # can the request can be satisfied from the character buffer?
460edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if chars < 0:
461edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                if size < 0:
462edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    if self.charbuffer:
463edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        break
464edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                elif len(self.charbuffer) >= size:
465edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    break
466edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            else:
467edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                if len(self.charbuffer) >= chars:
468edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    break
469edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            # we need more data
470edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if size < 0:
471edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                newdata = self.stream.read()
472edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            else:
473edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                newdata = self.stream.read(size)
474edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            # decode bytes (those remaining from the last call included)
475edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            data = self.bytebuffer + newdata
476edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            try:
477edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                newchars, decodedbytes = self.decode(data, self.errors)
478edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            except UnicodeDecodeError, exc:
479edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                if firstline:
480edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    newchars, decodedbytes = self.decode(data[:exc.start], self.errors)
481edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    lines = newchars.splitlines(True)
482edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    if len(lines)<=1:
483edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        raise
484edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                else:
485edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    raise
486edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            # keep undecoded bytes until the next call
487edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.bytebuffer = data[decodedbytes:]
488edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            # put new characters in the character buffer
489edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.charbuffer += newchars
490edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            # there was no data available
491edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if not newdata:
492edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                break
493edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if chars < 0:
494edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            # Return everything we've got
495edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            result = self.charbuffer
496edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.charbuffer = ""
497edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        else:
498edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            # Return the first chars characters
499edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            result = self.charbuffer[:chars]
500edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.charbuffer = self.charbuffer[chars:]
501edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return result
502edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
503edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def readline(self, size=None, keepends=True):
504edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
505edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """ Read one line from the input stream and return the
506edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            decoded data.
507edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
508edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            size, if given, is passed as size argument to the
509edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            read() method.
510edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
511edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
512edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # If we have lines cached from an earlier read, return
513edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # them unconditionally
514edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.linebuffer:
515edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            line = self.linebuffer[0]
516edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            del self.linebuffer[0]
517edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if len(self.linebuffer) == 1:
518edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                # revert to charbuffer mode; we might need more data
519edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                # next time
520edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.charbuffer = self.linebuffer[0]
521edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.linebuffer = None
522edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if not keepends:
523edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                line = line.splitlines(False)[0]
524edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            return line
525edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
526edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        readsize = size or 72
527edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        line = ""
528edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # If size is given, we call read() only once
529edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        while True:
530edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            data = self.read(readsize, firstline=True)
531edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if data:
532edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                # If we're at a "\r" read one extra character (which might
533edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                # be a "\n") to get a proper line ending. If the stream is
534edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                # temporarily exhausted we return the wrong line ending.
535edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                if data.endswith("\r"):
536edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    data += self.read(size=1, chars=1)
537edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
538edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            line += data
539edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            lines = line.splitlines(True)
540edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if lines:
541edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                if len(lines) > 1:
542edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    # More than one line result; the first line is a full line
543edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    # to return
544edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    line = lines[0]
545edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    del lines[0]
546edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    if len(lines) > 1:
547edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        # cache the remaining lines
548edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        lines[-1] += self.charbuffer
549edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        self.linebuffer = lines
550edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        self.charbuffer = None
551edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    else:
552edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        # only one remaining line, put it back into charbuffer
553edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        self.charbuffer = lines[0] + self.charbuffer
554edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    if not keepends:
555edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        line = line.splitlines(False)[0]
556edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    break
557edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                line0withend = lines[0]
558edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                line0withoutend = lines[0].splitlines(False)[0]
559edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                if line0withend != line0withoutend: # We really have a line end
560edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    # Put the rest back together and keep it until the next call
561edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.charbuffer = "".join(lines[1:]) + self.charbuffer
562edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    if keepends:
563edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        line = line0withend
564edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    else:
565edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        line = line0withoutend
566edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    break
567edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            # we didn't get anything or this was our only try
568edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if not data or size is not None:
569edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                if line and not keepends:
570edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    line = line.splitlines(False)[0]
571edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                break
572edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if readsize<8000:
573edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                readsize *= 2
574edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return line
575edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
576edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def readlines(self, sizehint=None, keepends=True):
577edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
578edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """ Read all lines available on the input stream
579edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            and return them as list of lines.
580edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
581edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            Line breaks are implemented using the codec's decoder
582edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            method and are included in the list entries.
583edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
584edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            sizehint, if given, is ignored since there is no efficient
585edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            way to finding the true end-of-line.
586edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
587edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
588edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        data = self.read()
589edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return data.splitlines(keepends)
590edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
591edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def reset(self):
592edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
593edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """ Resets the codec buffers used for keeping state.
594edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
595edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            Note that no stream repositioning should take place.
596edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            This method is primarily intended to be able to recover
597edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            from decoding errors.
598edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
599edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
600edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.bytebuffer = ""
601edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.charbuffer = u""
602edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.linebuffer = None
603edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
604edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def seek(self, offset, whence=0):
605edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """ Set the input stream's current position.
606edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
607edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            Resets the codec buffers used for keeping state.
608edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
609edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.stream.seek(offset, whence)
610edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.reset()
611edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
612edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def next(self):
613edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
614edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """ Return the next decoded line from the input stream."""
615edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        line = self.readline()
616edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if line:
617edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            return line
618edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        raise StopIteration
619edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
620edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __iter__(self):
621edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return self
622edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
623edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __getattr__(self, name,
624edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    getattr=getattr):
625edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
626edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """ Inherit all other methods from the underlying stream.
627edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
628edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return getattr(self.stream, name)
629edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
630edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __enter__(self):
631edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return self
632edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
633edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __exit__(self, type, value, tb):
634edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.stream.close()
635edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
636edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep###
637edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
638edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass StreamReaderWriter:
639edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
640edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """ StreamReaderWriter instances allow wrapping streams which
641edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        work in both read and write modes.
642edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
643edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        The design is such that one can use the factory functions
644edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        returned by the codec.lookup() function to construct the
645edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        instance.
646edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
647edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
648edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # Optional attributes set by the file wrappers below
649edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    encoding = 'unknown'
650edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
651edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __init__(self, stream, Reader, Writer, errors='strict'):
652edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
653edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """ Creates a StreamReaderWriter instance.
654edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
655edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            stream must be a Stream-like object.
656edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
657edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            Reader, Writer must be factory functions or classes
658edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            providing the StreamReader, StreamWriter interface resp.
659edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
660edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            Error handling is done in the same way as defined for the
661edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            StreamWriter/Readers.
662edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
663edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
664edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.stream = stream
665edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.reader = Reader(stream, errors)
666edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.writer = Writer(stream, errors)
667edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.errors = errors
668edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
669edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def read(self, size=-1):
670edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
671edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return self.reader.read(size)
672edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
673edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def readline(self, size=None):
674edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
675edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return self.reader.readline(size)
676edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
677edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def readlines(self, sizehint=None):
678edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
679edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return self.reader.readlines(sizehint)
680edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
681edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def next(self):
682edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
683edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """ Return the next decoded line from the input stream."""
684edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return self.reader.next()
685edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
686edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __iter__(self):
687edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return self
688edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
689edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def write(self, data):
690edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
691edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return self.writer.write(data)
692edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
693edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def writelines(self, list):
694edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
695edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return self.writer.writelines(list)
696edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
697edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def reset(self):
698edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
699edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.reader.reset()
700edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.writer.reset()
701edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
702edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def seek(self, offset, whence=0):
703edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.stream.seek(offset, whence)
704edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.reader.reset()
705edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if whence == 0 and offset == 0:
706edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.writer.reset()
707edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
708edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __getattr__(self, name,
709edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    getattr=getattr):
710edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
711edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """ Inherit all other methods from the underlying stream.
712edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
713edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return getattr(self.stream, name)
714edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
715edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # these are needed to make "with codecs.open(...)" work properly
716edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
717edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __enter__(self):
718edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return self
719edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
720edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __exit__(self, type, value, tb):
721edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.stream.close()
722edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
723edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep###
724edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
725edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass StreamRecoder:
726edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
727edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """ StreamRecoder instances provide a frontend - backend
728edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        view of encoding data.
729edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
730edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        They use the complete set of APIs returned by the
731edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs.lookup() function to implement their task.
732edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
733edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Data written to the stream is first decoded into an
734edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        intermediate format (which is dependent on the given codec
735edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        combination) and then written to the stream using an instance
736edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        of the provided Writer class.
737edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
738edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        In the other direction, data is read from the stream using a
739edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Reader instance and then return encoded data to the caller.
740edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
741edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
742edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # Optional attributes set by the file wrappers below
743edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    data_encoding = 'unknown'
744edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    file_encoding = 'unknown'
745edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
746edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __init__(self, stream, encode, decode, Reader, Writer,
747edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                 errors='strict'):
748edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
749edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """ Creates a StreamRecoder instance which implements a two-way
750edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            conversion: encode and decode work on the frontend (the
751edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            input to .read() and output of .write()) while
752edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            Reader and Writer work on the backend (reading and
753edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            writing to the stream).
754edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
755edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            You can use these objects to do transparent direct
756edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            recodings from e.g. latin-1 to utf-8 and back.
757edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
758edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            stream must be a file-like object.
759edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
760edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            encode, decode must adhere to the Codec interface, Reader,
761edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            Writer must be factory functions or classes providing the
762edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            StreamReader, StreamWriter interface resp.
763edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
764edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            encode and decode are needed for the frontend translation,
765edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            Reader and Writer for the backend translation. Unicode is
766edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            used as intermediate encoding.
767edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
768edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            Error handling is done in the same way as defined for the
769edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            StreamWriter/Readers.
770edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
771edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
772edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.stream = stream
773edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.encode = encode
774edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.decode = decode
775edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.reader = Reader(stream, errors)
776edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.writer = Writer(stream, errors)
777edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.errors = errors
778edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
779edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def read(self, size=-1):
780edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
781edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        data = self.reader.read(size)
782edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        data, bytesencoded = self.encode(data, self.errors)
783edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return data
784edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
785edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def readline(self, size=None):
786edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
787edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if size is None:
788edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            data = self.reader.readline()
789edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        else:
790edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            data = self.reader.readline(size)
791edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        data, bytesencoded = self.encode(data, self.errors)
792edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return data
793edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
794edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def readlines(self, sizehint=None):
795edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
796edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        data = self.reader.read()
797edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        data, bytesencoded = self.encode(data, self.errors)
798edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return data.splitlines(1)
799edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
800edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def next(self):
801edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
802edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """ Return the next decoded line from the input stream."""
803edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        data = self.reader.next()
804edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        data, bytesencoded = self.encode(data, self.errors)
805edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return data
806edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
807edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __iter__(self):
808edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return self
809edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
810edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def write(self, data):
811edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
812edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        data, bytesdecoded = self.decode(data, self.errors)
813edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return self.writer.write(data)
814edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
815edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def writelines(self, list):
816edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
817edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        data = ''.join(list)
818edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        data, bytesdecoded = self.decode(data, self.errors)
819edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return self.writer.write(data)
820edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
821edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def reset(self):
822edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
823edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.reader.reset()
824edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.writer.reset()
825edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
826edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __getattr__(self, name,
827edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    getattr=getattr):
828edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
829edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """ Inherit all other methods from the underlying stream.
830edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        """
831edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return getattr(self.stream, name)
832edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
833edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __enter__(self):
834edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return self
835edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
836edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __exit__(self, type, value, tb):
837edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.stream.close()
838edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
839edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### Shortcuts
840edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
841edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef open(filename, mode='rb', encoding=None, errors='strict', buffering=1):
842edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
843edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """ Open an encoded file using the given mode and return
844edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        a wrapped version providing transparent encoding/decoding.
845edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
846edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Note: The wrapped version will only accept the object format
847edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        defined by the codecs, i.e. Unicode objects for most builtin
848edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        codecs. Output is also codec dependent and will usually be
849edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Unicode as well.
850edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
851edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Files are always opened in binary mode, even if no binary mode
852edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        was specified. This is done to avoid data loss due to encodings
853edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        using 8-bit values. The default file mode is 'rb' meaning to
854edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        open the file in binary read mode.
855edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
856edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        encoding specifies the encoding which is to be used for the
857edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        file.
858edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
859edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        errors may be given to define the error handling. It defaults
860edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        to 'strict' which causes ValueErrors to be raised in case an
861edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        encoding error occurs.
862edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
863edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        buffering has the same meaning as for the builtin open() API.
864edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        It defaults to line buffered.
865edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
866edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        The returned wrapped file object provides an extra attribute
867edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        .encoding which allows querying the used encoding. This
868edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        attribute is only available if an encoding was specified as
869edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        parameter.
870edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
871edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
872edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    if encoding is not None:
873edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if 'U' in mode:
874edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            # No automatic conversion of '\n' is done on reading and writing
875edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            mode = mode.strip().replace('U', '')
876edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if mode[:1] not in set('rwa'):
877edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                mode = 'r' + mode
878edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if 'b' not in mode:
879edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            # Force opening of the file in binary mode
880edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            mode = mode + 'b'
881edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    file = __builtin__.open(filename, mode, buffering)
882edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    if encoding is None:
883edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return file
884edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    info = lookup(encoding)
885edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    srw = StreamReaderWriter(file, info.streamreader, info.streamwriter, errors)
886edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # Add attributes to simplify introspection
887edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    srw.encoding = encoding
888edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    return srw
889edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
890edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):
891edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
892edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """ Return a wrapped version of file which provides transparent
893edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        encoding translation.
894edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
895edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Strings written to the wrapped file are interpreted according
896edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        to the given data_encoding and then written to the original
897edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        file as string using file_encoding. The intermediate encoding
898edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        will usually be Unicode but depends on the specified codecs.
899edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
900edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Strings are read from the file using file_encoding and then
901edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        passed back to the caller as string using data_encoding.
902edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
903edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        If file_encoding is not given, it defaults to data_encoding.
904edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
905edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        errors may be given to define the error handling. It defaults
906edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        to 'strict' which causes ValueErrors to be raised in case an
907edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        encoding error occurs.
908edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
909edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        The returned wrapped file object provides two extra attributes
910edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        .data_encoding and .file_encoding which reflect the given
911edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        parameters of the same name. The attributes can be used for
912edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        introspection by Python programs.
913edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
914edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
915edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    if file_encoding is None:
916edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        file_encoding = data_encoding
917edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    data_info = lookup(data_encoding)
918edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    file_info = lookup(file_encoding)
919edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    sr = StreamRecoder(file, data_info.encode, data_info.decode,
920edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                       file_info.streamreader, file_info.streamwriter, errors)
921edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # Add attributes to simplify introspection
922edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    sr.data_encoding = data_encoding
923edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    sr.file_encoding = file_encoding
924edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    return sr
925edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
926edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### Helpers for codec lookup
927edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
928edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef getencoder(encoding):
929edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
930edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """ Lookup up the codec for the given encoding and return
931edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        its encoder function.
932edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
933edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Raises a LookupError in case the encoding cannot be found.
934edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
935edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
936edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    return lookup(encoding).encode
937edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
938edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef getdecoder(encoding):
939edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
940edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """ Lookup up the codec for the given encoding and return
941edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        its decoder function.
942edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
943edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Raises a LookupError in case the encoding cannot be found.
944edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
945edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
946edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    return lookup(encoding).decode
947edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
948edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef getincrementalencoder(encoding):
949edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
950edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """ Lookup up the codec for the given encoding and return
951edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        its IncrementalEncoder class or factory function.
952edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
953edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Raises a LookupError in case the encoding cannot be found
954edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        or the codecs doesn't provide an incremental encoder.
955edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
956edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
957edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    encoder = lookup(encoding).incrementalencoder
958edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    if encoder is None:
959edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        raise LookupError(encoding)
960edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    return encoder
961edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
962edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef getincrementaldecoder(encoding):
963edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
964edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """ Lookup up the codec for the given encoding and return
965edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        its IncrementalDecoder class or factory function.
966edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
967edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Raises a LookupError in case the encoding cannot be found
968edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        or the codecs doesn't provide an incremental decoder.
969edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
970edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
971edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    decoder = lookup(encoding).incrementaldecoder
972edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    if decoder is None:
973edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        raise LookupError(encoding)
974edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    return decoder
975edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
976edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef getreader(encoding):
977edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
978edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """ Lookup up the codec for the given encoding and return
979edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        its StreamReader class or factory function.
980edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
981edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Raises a LookupError in case the encoding cannot be found.
982edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
983edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
984edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    return lookup(encoding).streamreader
985edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
986edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef getwriter(encoding):
987edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
988edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """ Lookup up the codec for the given encoding and return
989edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        its StreamWriter class or factory function.
990edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
991edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Raises a LookupError in case the encoding cannot be found.
992edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
993edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
994edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    return lookup(encoding).streamwriter
995edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
996edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef iterencode(iterator, encoding, errors='strict', **kwargs):
997edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
998edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    Encoding iterator.
999edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1000edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    Encodes the input strings from the iterator using a IncrementalEncoder.
1001edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1002edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    errors and kwargs are passed through to the IncrementalEncoder
1003edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    constructor.
1004edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
1005edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    encoder = getincrementalencoder(encoding)(errors, **kwargs)
1006edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    for input in iterator:
1007edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        output = encoder.encode(input)
1008edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if output:
1009edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            yield output
1010edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    output = encoder.encode("", True)
1011edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    if output:
1012edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        yield output
1013edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1014edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef iterdecode(iterator, encoding, errors='strict', **kwargs):
1015edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
1016edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    Decoding iterator.
1017edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1018edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    Decodes the input strings from the iterator using a IncrementalDecoder.
1019edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1020edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    errors and kwargs are passed through to the IncrementalDecoder
1021edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    constructor.
1022edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
1023edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    decoder = getincrementaldecoder(encoding)(errors, **kwargs)
1024edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    for input in iterator:
1025edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        output = decoder.decode(input)
1026edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if output:
1027edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            yield output
1028edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    output = decoder.decode("", True)
1029edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    if output:
1030edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        yield output
1031edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1032edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### Helpers for charmap-based codecs
1033edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1034edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef make_identity_dict(rng):
1035edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1036edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """ make_identity_dict(rng) -> dict
1037edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1038edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        Return a dictionary where elements of the rng sequence are
1039edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        mapped to themselves.
1040edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1041edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
1042edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    res = {}
1043edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    for i in rng:
1044edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        res[i]=i
1045edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    return res
1046edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1047edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef make_encoding_map(decoding_map):
1048edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1049edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """ Creates an encoding map from a decoding map.
1050edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1051edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        If a target mapping in the decoding map occurs multiple
1052edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        times, then that target is mapped to None (undefined mapping),
1053edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        causing an exception when encountered by the charmap codec
1054edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        during translation.
1055edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1056edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        One example where this happens is cp875.py which decodes
1057edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        multiple character to \u001a.
1058edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1059edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    """
1060edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    m = {}
1061edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    for k,v in decoding_map.items():
1062edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if not v in m:
1063edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            m[v] = k
1064edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        else:
1065edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            m[v] = None
1066edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    return m
1067edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1068edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### error handlers
1069edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1070edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoeptry:
1071edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    strict_errors = lookup_error("strict")
1072edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    ignore_errors = lookup_error("ignore")
1073edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    replace_errors = lookup_error("replace")
1074edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace")
1075edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    backslashreplace_errors = lookup_error("backslashreplace")
1076edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepexcept LookupError:
1077edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # In --disable-unicode builds, these error handler are missing
1078edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    strict_errors = None
1079edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    ignore_errors = None
1080edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    replace_errors = None
1081edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    xmlcharrefreplace_errors = None
1082edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    backslashreplace_errors = None
1083edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1084edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# Tell modulefinder that using codecs probably needs the encodings
1085edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# package
1086edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep_false = 0
1087edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepif _false:
1088edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    import encodings
1089edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1090edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep### Tests
1091edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1092edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepif __name__ == '__main__':
1093edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1094edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # Make stdout translate Latin-1 output into UTF-8 output
1095edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'utf-8')
1096edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
1097edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    # Have stdin translate Latin-1 input into UTF-8 input
1098edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    sys.stdin = EncodedFile(sys.stdin, 'utf-8', 'latin-1')
1099