1import binascii
2import io
3import os
4import re
5import sys
6import tempfile
7import mimetypes
8try:
9    import simplejson as json
10except ImportError:
11    import json
12import warnings
13
14from webob.acceptparse import (
15    AcceptLanguage,
16    AcceptCharset,
17    MIMEAccept,
18    MIMENilAccept,
19    NoAccept,
20    accept_property,
21    )
22
23from webob.cachecontrol import (
24    CacheControl,
25    serialize_cache_control,
26    )
27
28from webob.compat import (
29    PY3,
30    bytes_,
31    integer_types,
32    native_,
33    parse_qsl_text,
34    reraise,
35    text_type,
36    url_encode,
37    url_quote,
38    url_unquote,
39    quote_plus,
40    urlparse,
41    cgi_FieldStorage
42    )
43
44from webob.cookies import RequestCookies
45
46from webob.descriptors import (
47    CHARSET_RE,
48    SCHEME_RE,
49    converter,
50    converter_date,
51    environ_getter,
52    environ_decoder,
53    parse_auth,
54    parse_int,
55    parse_int_safe,
56    parse_range,
57    serialize_auth,
58    serialize_if_range,
59    serialize_int,
60    serialize_range,
61    upath_property,
62    deprecated_property,
63    )
64
65from webob.etag import (
66    IfRange,
67    AnyETag,
68    NoETag,
69    etag_property,
70    )
71
72from webob.headers import EnvironHeaders
73
74from webob.multidict import (
75    NestedMultiDict,
76    MultiDict,
77    NoVars,
78    GetDict,
79    )
80
81from webob.util import warn_deprecation
82
83__all__ = ['BaseRequest', 'Request', 'LegacyRequest']
84
85class _NoDefault:
86    def __repr__(self):
87        return '(No Default)'
88NoDefault = _NoDefault()
89
90PATH_SAFE = '/:@&+$,'
91
92http_method_probably_has_body = dict.fromkeys(
93    ('GET', 'HEAD', 'DELETE', 'TRACE'), False)
94http_method_probably_has_body.update(
95    dict.fromkeys(('POST', 'PUT', 'PATCH'), True))
96
97_LATIN_ENCODINGS = (
98    'ascii', 'latin-1', 'latin', 'latin_1', 'l1', 'latin1',
99    'iso-8859-1', 'iso8859_1', 'iso_8859_1', 'iso8859', '8859',
100    )
101
102class BaseRequest(object):
103    ## The limit after which request bodies should be stored on disk
104    ## if they are read in (under this, and the request body is stored
105    ## in memory):
106    request_body_tempfile_limit = 10*1024
107
108    _charset = None
109
110    def __init__(self, environ, charset=None, unicode_errors=None,
111                 decode_param_names=None, **kw):
112
113        if type(environ) is not dict:
114            raise TypeError(
115                "WSGI environ must be a dict; you passed %r" % (environ,))
116        if unicode_errors is not None:
117            warnings.warn(
118                "You unicode_errors=%r to the Request constructor.  Passing a "
119                "``unicode_errors`` value to the Request is no longer "
120                "supported in WebOb 1.2+.  This value has been ignored " % (
121                    unicode_errors,),
122                DeprecationWarning
123                )
124        if decode_param_names is not None:
125            warnings.warn(
126                "You passed decode_param_names=%r to the Request constructor. "
127                "Passing a ``decode_param_names`` value to the Request "
128                "is no longer supported in WebOb 1.2+.  This value has "
129                "been ignored " % (decode_param_names,),
130                DeprecationWarning
131                )
132        if not _is_utf8(charset):
133            raise DeprecationWarning(
134                "You passed charset=%r to the Request constructor. As of "
135                "WebOb 1.2, if your application needs a non-UTF-8 request "
136                "charset, please construct the request without a charset or "
137                "with a charset of 'None',  then use ``req = "
138                "req.decode(charset)``" % charset
139
140            )
141        d = self.__dict__
142        d['environ'] = environ
143        if kw:
144            cls = self.__class__
145            if 'method' in kw:
146                # set method first, because .body setters
147                # depend on it for checks
148                self.method = kw.pop('method')
149            for name, value in kw.items():
150                if not hasattr(cls, name):
151                    raise TypeError(
152                        "Unexpected keyword: %s=%r" % (name, value))
153                setattr(self, name, value)
154
155    if PY3: # pragma: no cover
156        def encget(self, key, default=NoDefault, encattr=None):
157            val = self.environ.get(key, default)
158            if val is NoDefault:
159                raise KeyError(key)
160            if val is default:
161                return default
162            if not encattr:
163                return val
164            encoding = getattr(self, encattr)
165            if encoding in _LATIN_ENCODINGS: # shortcut
166                return val
167            return bytes_(val, 'latin-1').decode(encoding)
168    else:
169        def encget(self, key, default=NoDefault, encattr=None):
170            val = self.environ.get(key, default)
171            if val is NoDefault:
172                raise KeyError(key)
173            if val is default:
174                return default
175            if encattr is None:
176                return val
177            encoding = getattr(self, encattr)
178            return val.decode(encoding)
179
180    def encset(self, key, val, encattr=None):
181        if encattr:
182            encoding = getattr(self, encattr)
183        else:
184            encoding = 'ascii'
185        if PY3: # pragma: no cover
186            self.environ[key] = bytes_(val, encoding).decode('latin-1')
187        else:
188            self.environ[key] = bytes_(val, encoding)
189
190    @property
191    def charset(self):
192        if self._charset is None:
193            charset = detect_charset(self._content_type_raw)
194            if _is_utf8(charset):
195                charset = 'UTF-8'
196            self._charset = charset
197        return self._charset
198
199    @charset.setter
200    def charset(self, charset):
201        if _is_utf8(charset):
202            charset = 'UTF-8'
203        if charset != self.charset:
204            raise DeprecationWarning("Use req = req.decode(%r)" % charset)
205
206    def decode(self, charset=None, errors='strict'):
207        charset = charset or self.charset
208        if charset == 'UTF-8':
209            return self
210        # cookies and path are always utf-8
211        t = Transcoder(charset, errors)
212
213        new_content_type = CHARSET_RE.sub('; charset="UTF-8"',
214                                          self._content_type_raw)
215        content_type = self.content_type
216        r = self.__class__(
217            self.environ.copy(),
218            query_string=t.transcode_query(self.query_string),
219            content_type=new_content_type,
220        )
221
222        if content_type == 'application/x-www-form-urlencoded':
223            r.body = bytes_(t.transcode_query(native_(r.body)))
224            return r
225        elif content_type != 'multipart/form-data':
226            return r
227
228        fs_environ = self.environ.copy()
229        fs_environ.setdefault('CONTENT_LENGTH', '0')
230        fs_environ['QUERY_STRING'] = ''
231        if PY3: # pragma: no cover
232            fs = cgi_FieldStorage(fp=self.body_file,
233                                  environ=fs_environ,
234                                  keep_blank_values=True,
235                                  encoding=charset,
236                                  errors=errors)
237        else:
238            fs = cgi_FieldStorage(fp=self.body_file,
239                                  environ=fs_environ,
240                                  keep_blank_values=True)
241
242
243        fout = t.transcode_fs(fs, r._content_type_raw)
244
245        # this order is important, because setting body_file
246        # resets content_length
247        r.body_file = fout
248        r.content_length = fout.tell()
249        fout.seek(0)
250        return r
251
252
253    # this is necessary for correct warnings depth for both
254    # BaseRequest and Request (due to AdhocAttrMixin.__setattr__)
255    _setattr_stacklevel = 2
256
257    def _body_file__get(self):
258        """
259            Input stream of the request (wsgi.input).
260            Setting this property resets the content_length and seekable flag
261            (unlike setting req.body_file_raw).
262        """
263        if not self.is_body_readable:
264            return io.BytesIO()
265        r = self.body_file_raw
266        clen = self.content_length
267        if not self.is_body_seekable and clen is not None:
268            # we need to wrap input in LimitedLengthFile
269            # but we have to cache the instance as well
270            # otherwise this would stop working
271            # (.remaining counter would reset between calls):
272            #   req.body_file.read(100)
273            #   req.body_file.read(100)
274            env = self.environ
275            wrapped, raw = env.get('webob._body_file', (0,0))
276            if raw is not r:
277                wrapped = LimitedLengthFile(r, clen)
278                wrapped = io.BufferedReader(wrapped)
279                env['webob._body_file'] = wrapped, r
280            r = wrapped
281        return r
282
283    def _body_file__set(self, value):
284        if isinstance(value, bytes):
285            warn_deprecation(
286                "Please use req.body = b'bytes' or req.body_file = fileobj",
287                '1.2',
288                self._setattr_stacklevel
289            )
290        self.content_length = None
291        self.body_file_raw = value
292        self.is_body_seekable = False
293        self.is_body_readable = True
294    def _body_file__del(self):
295        self.body = b''
296    body_file = property(_body_file__get,
297                         _body_file__set,
298                         _body_file__del,
299                         doc=_body_file__get.__doc__)
300    body_file_raw = environ_getter('wsgi.input')
301    @property
302    def body_file_seekable(self):
303        """
304            Get the body of the request (wsgi.input) as a seekable file-like
305            object. Middleware and routing applications should use this
306            attribute over .body_file.
307
308            If you access this value, CONTENT_LENGTH will also be updated.
309        """
310        if not self.is_body_seekable:
311            self.make_body_seekable()
312        return self.body_file_raw
313
314    url_encoding = environ_getter('webob.url_encoding', 'UTF-8')
315    scheme = environ_getter('wsgi.url_scheme')
316    method = environ_getter('REQUEST_METHOD', 'GET')
317    http_version = environ_getter('SERVER_PROTOCOL')
318    content_length = converter(
319        environ_getter('CONTENT_LENGTH', None, '14.13'),
320        parse_int_safe, serialize_int, 'int')
321    remote_user = environ_getter('REMOTE_USER', None)
322    remote_addr = environ_getter('REMOTE_ADDR', None)
323    query_string = environ_getter('QUERY_STRING', '')
324    server_name = environ_getter('SERVER_NAME')
325    server_port = converter(
326        environ_getter('SERVER_PORT'),
327        parse_int, serialize_int, 'int')
328
329    script_name = environ_decoder('SCRIPT_NAME', '', encattr='url_encoding')
330    path_info = environ_decoder('PATH_INFO', encattr='url_encoding')
331
332    # bw compat
333    uscript_name = script_name
334    upath_info = path_info
335
336    _content_type_raw = environ_getter('CONTENT_TYPE', '')
337
338    def _content_type__get(self):
339        """Return the content type, but leaving off any parameters (like
340        charset, but also things like the type in ``application/atom+xml;
341        type=entry``)
342
343        If you set this property, you can include parameters, or if
344        you don't include any parameters in the value then existing
345        parameters will be preserved.
346        """
347        return self._content_type_raw.split(';', 1)[0]
348    def _content_type__set(self, value=None):
349        if value is not None:
350            value = str(value)
351            if ';' not in value:
352                content_type = self._content_type_raw
353                if ';' in content_type:
354                    value += ';' + content_type.split(';', 1)[1]
355        self._content_type_raw = value
356
357    content_type = property(_content_type__get,
358                            _content_type__set,
359                            _content_type__set,
360                            _content_type__get.__doc__)
361
362    _headers = None
363
364    def _headers__get(self):
365        """
366        All the request headers as a case-insensitive dictionary-like
367        object.
368        """
369        if self._headers is None:
370            self._headers = EnvironHeaders(self.environ)
371        return self._headers
372
373    def _headers__set(self, value):
374        self.headers.clear()
375        self.headers.update(value)
376
377    headers = property(_headers__get, _headers__set, doc=_headers__get.__doc__)
378
379    @property
380    def client_addr(self):
381        """
382        The effective client IP address as a string.  If the
383        ``HTTP_X_FORWARDED_FOR`` header exists in the WSGI environ, this
384        attribute returns the client IP address present in that header
385        (e.g. if the header value is ``192.168.1.1, 192.168.1.2``, the value
386        will be ``192.168.1.1``). If no ``HTTP_X_FORWARDED_FOR`` header is
387        present in the environ at all, this attribute will return the value
388        of the ``REMOTE_ADDR`` header.  If the ``REMOTE_ADDR`` header is
389        unset, this attribute will return the value ``None``.
390
391        .. warning::
392
393           It is possible for user agents to put someone else's IP or just
394           any string in ``HTTP_X_FORWARDED_FOR`` as it is a normal HTTP
395           header. Forward proxies can also provide incorrect values (private
396           IP addresses etc).  You cannot "blindly" trust the result of this
397           method to provide you with valid data unless you're certain that
398           ``HTTP_X_FORWARDED_FOR`` has the correct values.  The WSGI server
399           must be behind a trusted proxy for this to be true.
400        """
401        e = self.environ
402        xff = e.get('HTTP_X_FORWARDED_FOR')
403        if xff is not None:
404            addr = xff.split(',')[0].strip()
405        else:
406            addr = e.get('REMOTE_ADDR')
407        return addr
408
409    @property
410    def host_port(self):
411        """
412        The effective server port number as a string.  If the ``HTTP_HOST``
413        header exists in the WSGI environ, this attribute returns the port
414        number present in that header. If the ``HTTP_HOST`` header exists but
415        contains no explicit port number: if the WSGI url scheme is "https" ,
416        this attribute returns "443", if the WSGI url scheme is "http", this
417        attribute returns "80" .  If no ``HTTP_HOST`` header is present in
418        the environ at all, this attribute will return the value of the
419        ``SERVER_PORT`` header (which is guaranteed to be present).
420        """
421        e = self.environ
422        host = e.get('HTTP_HOST')
423        if host is not None:
424            if ':' in host:
425                host, port = host.split(':', 1)
426            else:
427                url_scheme = e['wsgi.url_scheme']
428                if url_scheme == 'https':
429                    port = '443'
430                else:
431                    port = '80'
432        else:
433            port = e['SERVER_PORT']
434        return port
435
436    @property
437    def host_url(self):
438        """
439        The URL through the host (no path)
440        """
441        e = self.environ
442        scheme = e.get('wsgi.url_scheme')
443        url = scheme + '://'
444        host = e.get('HTTP_HOST')
445        if host is not None:
446            if ':' in host:
447                host, port = host.split(':', 1)
448            else:
449                port = None
450        else:
451            host = e.get('SERVER_NAME')
452            port = e.get('SERVER_PORT')
453        if scheme == 'https':
454            if port == '443':
455                port = None
456        elif scheme == 'http':
457            if port == '80':
458                port = None
459        url += host
460        if port:
461            url += ':%s' % port
462        return url
463
464    @property
465    def application_url(self):
466        """
467        The URL including SCRIPT_NAME (no PATH_INFO or query string)
468        """
469        bscript_name = bytes_(self.script_name, self.url_encoding)
470        return self.host_url + url_quote(bscript_name, PATH_SAFE)
471
472    @property
473    def path_url(self):
474        """
475        The URL including SCRIPT_NAME and PATH_INFO, but not QUERY_STRING
476        """
477        bpath_info = bytes_(self.path_info, self.url_encoding)
478        return self.application_url + url_quote(bpath_info, PATH_SAFE)
479
480    @property
481    def path(self):
482        """
483        The path of the request, without host or query string
484        """
485        bscript = bytes_(self.script_name, self.url_encoding)
486        bpath = bytes_(self.path_info, self.url_encoding)
487        return url_quote(bscript, PATH_SAFE) + url_quote(bpath, PATH_SAFE)
488
489    @property
490    def path_qs(self):
491        """
492        The path of the request, without host but with query string
493        """
494        path = self.path
495        qs = self.environ.get('QUERY_STRING')
496        if qs:
497            path += '?' + qs
498        return path
499
500    @property
501    def url(self):
502        """
503        The full request URL, including QUERY_STRING
504        """
505        url = self.path_url
506        qs = self.environ.get('QUERY_STRING')
507        if qs:
508            url += '?' + qs
509        return url
510
511    def relative_url(self, other_url, to_application=False):
512        """
513        Resolve other_url relative to the request URL.
514
515        If ``to_application`` is True, then resolve it relative to the
516        URL with only SCRIPT_NAME
517        """
518        if to_application:
519            url = self.application_url
520            if not url.endswith('/'):
521                url += '/'
522        else:
523            url = self.path_url
524        return urlparse.urljoin(url, other_url)
525
526    def path_info_pop(self, pattern=None):
527        """
528        'Pops' off the next segment of PATH_INFO, pushing it onto
529        SCRIPT_NAME, and returning the popped segment.  Returns None if
530        there is nothing left on PATH_INFO.
531
532        Does not return ``''`` when there's an empty segment (like
533        ``/path//path``); these segments are just ignored.
534
535        Optional ``pattern`` argument is a regexp to match the return value
536        before returning. If there is no match, no changes are made to the
537        request and None is returned.
538        """
539        path = self.path_info
540        if not path:
541            return None
542        slashes = ''
543        while path.startswith('/'):
544            slashes += '/'
545            path = path[1:]
546        idx = path.find('/')
547        if idx == -1:
548            idx = len(path)
549        r = path[:idx]
550        if pattern is None or re.match(pattern, r):
551            self.script_name += slashes + r
552            self.path_info = path[idx:]
553            return r
554
555    def path_info_peek(self):
556        """
557        Returns the next segment on PATH_INFO, or None if there is no
558        next segment.  Doesn't modify the environment.
559        """
560        path = self.path_info
561        if not path:
562            return None
563        path = path.lstrip('/')
564        return path.split('/', 1)[0]
565
566    def _urlvars__get(self):
567        """
568        Return any *named* variables matched in the URL.
569
570        Takes values from ``environ['wsgiorg.routing_args']``.
571        Systems like ``routes`` set this value.
572        """
573        if 'paste.urlvars' in self.environ:
574            return self.environ['paste.urlvars']
575        elif 'wsgiorg.routing_args' in self.environ:
576            return self.environ['wsgiorg.routing_args'][1]
577        else:
578            result = {}
579            self.environ['wsgiorg.routing_args'] = ((), result)
580            return result
581
582    def _urlvars__set(self, value):
583        environ = self.environ
584        if 'wsgiorg.routing_args' in environ:
585            environ['wsgiorg.routing_args'] = (
586                    environ['wsgiorg.routing_args'][0], value)
587            if 'paste.urlvars' in environ:
588                del environ['paste.urlvars']
589        elif 'paste.urlvars' in environ:
590            environ['paste.urlvars'] = value
591        else:
592            environ['wsgiorg.routing_args'] = ((), value)
593
594    def _urlvars__del(self):
595        if 'paste.urlvars' in self.environ:
596            del self.environ['paste.urlvars']
597        if 'wsgiorg.routing_args' in self.environ:
598            if not self.environ['wsgiorg.routing_args'][0]:
599                del self.environ['wsgiorg.routing_args']
600            else:
601                self.environ['wsgiorg.routing_args'] = (
602                        self.environ['wsgiorg.routing_args'][0], {})
603
604    urlvars = property(_urlvars__get,
605                       _urlvars__set,
606                       _urlvars__del,
607                       doc=_urlvars__get.__doc__)
608
609    def _urlargs__get(self):
610        """
611        Return any *positional* variables matched in the URL.
612
613        Takes values from ``environ['wsgiorg.routing_args']``.
614        Systems like ``routes`` set this value.
615        """
616        if 'wsgiorg.routing_args' in self.environ:
617            return self.environ['wsgiorg.routing_args'][0]
618        else:
619            # Since you can't update this value in-place, we don't need
620            # to set the key in the environment
621            return ()
622
623    def _urlargs__set(self, value):
624        environ = self.environ
625        if 'paste.urlvars' in environ:
626            # Some overlap between this and wsgiorg.routing_args; we need
627            # wsgiorg.routing_args to make this work
628            routing_args = (value, environ.pop('paste.urlvars'))
629        elif 'wsgiorg.routing_args' in environ:
630            routing_args = (value, environ['wsgiorg.routing_args'][1])
631        else:
632            routing_args = (value, {})
633        environ['wsgiorg.routing_args'] = routing_args
634
635    def _urlargs__del(self):
636        if 'wsgiorg.routing_args' in self.environ:
637            if not self.environ['wsgiorg.routing_args'][1]:
638                del self.environ['wsgiorg.routing_args']
639            else:
640                self.environ['wsgiorg.routing_args'] = (
641                        (), self.environ['wsgiorg.routing_args'][1])
642
643    urlargs = property(_urlargs__get,
644                       _urlargs__set,
645                       _urlargs__del,
646                       _urlargs__get.__doc__)
647
648    @property
649    def is_xhr(self):
650        """Is X-Requested-With header present and equal to ``XMLHttpRequest``?
651
652        Note: this isn't set by every XMLHttpRequest request, it is
653        only set if you are using a Javascript library that sets it
654        (or you set the header yourself manually).  Currently
655        Prototype and jQuery are known to set this header."""
656        return self.environ.get('HTTP_X_REQUESTED_WITH', '') == 'XMLHttpRequest'
657
658    def _host__get(self):
659        """Host name provided in HTTP_HOST, with fall-back to SERVER_NAME"""
660        if 'HTTP_HOST' in self.environ:
661            return self.environ['HTTP_HOST']
662        else:
663            return '%(SERVER_NAME)s:%(SERVER_PORT)s' % self.environ
664    def _host__set(self, value):
665        self.environ['HTTP_HOST'] = value
666    def _host__del(self):
667        if 'HTTP_HOST' in self.environ:
668            del self.environ['HTTP_HOST']
669    host = property(_host__get, _host__set, _host__del, doc=_host__get.__doc__)
670
671    @property
672    def domain(self):
673        """ Returns the domain portion of the host value.  Equivalent to:
674
675        .. code-block:: python
676
677           domain = request.host
678           if ':' in domain:
679               domain = domain.split(':', 1)[0]
680
681        This will be equivalent to the domain portion of the ``HTTP_HOST``
682        value in the environment if it exists, or the ``SERVER_NAME`` value in
683        the environment if it doesn't.  For example, if the environment
684        contains an ``HTTP_HOST`` value of ``foo.example.com:8000``,
685        ``request.domain`` will return ``foo.example.com``.
686
687        Note that this value cannot be *set* on the request.  To set the host
688        value use :meth:`webob.request.Request.host` instead.
689        """
690        domain = self.host
691        if ':' in domain:
692             domain = domain.split(':', 1)[0]
693        return domain
694
695    def _body__get(self):
696        """
697        Return the content of the request body.
698        """
699        if not self.is_body_readable:
700            return b''
701        self.make_body_seekable() # we need this to have content_length
702        r = self.body_file.read(self.content_length)
703        self.body_file_raw.seek(0)
704        return r
705    def _body__set(self, value):
706        if value is None:
707            value = b''
708        if not isinstance(value, bytes):
709            raise TypeError("You can only set Request.body to bytes (not %r)"
710                                % type(value))
711        if not http_method_probably_has_body.get(self.method, True):
712            if not value:
713                self.content_length = None
714                self.body_file_raw = io.BytesIO()
715                return
716        self.content_length = len(value)
717        self.body_file_raw = io.BytesIO(value)
718        self.is_body_seekable = True
719    def _body__del(self):
720        self.body = b''
721    body = property(_body__get, _body__set, _body__del, doc=_body__get.__doc__)
722
723    def _json_body__get(self):
724        """Access the body of the request as JSON"""
725        return json.loads(self.body.decode(self.charset))
726
727    def _json_body__set(self, value):
728        self.body = json.dumps(value, separators=(',', ':')).encode(self.charset)
729
730    def _json_body__del(self):
731        del self.body
732
733    json = json_body = property(_json_body__get, _json_body__set, _json_body__del)
734
735    def _text__get(self):
736        """
737        Get/set the text value of the body
738        """
739        if not self.charset:
740            raise AttributeError(
741                "You cannot access Request.text unless charset is set")
742        body = self.body
743        return body.decode(self.charset)
744
745    def _text__set(self, value):
746        if not self.charset:
747            raise AttributeError(
748                "You cannot access Response.text unless charset is set")
749        if not isinstance(value, text_type):
750            raise TypeError(
751                "You can only set Request.text to a unicode string "
752                "(not %s)" % type(value))
753        self.body = value.encode(self.charset)
754
755    def _text__del(self):
756        del self.body
757
758    text = property(_text__get, _text__set, _text__del, doc=_text__get.__doc__)
759
760
761    @property
762    def POST(self):
763        """
764        Return a MultiDict containing all the variables from a form
765        request. Returns an empty dict-like object for non-form requests.
766
767        Form requests are typically POST requests, however PUT & PATCH requests
768        with an appropriate Content-Type are also supported.
769        """
770        env = self.environ
771        if self.method not in ('POST', 'PUT', 'PATCH'):
772            return NoVars('Not a form request')
773        if 'webob._parsed_post_vars' in env:
774            vars, body_file = env['webob._parsed_post_vars']
775            if body_file is self.body_file_raw:
776                return vars
777        content_type = self.content_type
778        if ((self.method == 'PUT' and not content_type)
779            or content_type not in
780                ('',
781                 'application/x-www-form-urlencoded',
782                 'multipart/form-data')
783                 ):
784            # Not an HTML form submission
785            return NoVars('Not an HTML form submission (Content-Type: %s)'
786                          % content_type)
787        self._check_charset()
788
789        self.make_body_seekable()
790        self.body_file_raw.seek(0)
791
792        fs_environ = env.copy()
793        # FieldStorage assumes a missing CONTENT_LENGTH, but a
794        # default of 0 is better:
795        fs_environ.setdefault('CONTENT_LENGTH', '0')
796        fs_environ['QUERY_STRING'] = ''
797        if PY3: # pragma: no cover
798            fs = cgi_FieldStorage(
799                fp=self.body_file,
800                environ=fs_environ,
801                keep_blank_values=True,
802                encoding='utf8')
803            vars = MultiDict.from_fieldstorage(fs)
804        else:
805            fs = cgi_FieldStorage(
806                fp=self.body_file,
807                environ=fs_environ,
808                keep_blank_values=True)
809            vars = MultiDict.from_fieldstorage(fs)
810
811        env['webob._parsed_post_vars'] = (vars, self.body_file_raw)
812        return vars
813
814    @property
815    def GET(self):
816        """
817        Return a MultiDict containing all the variables from the
818        QUERY_STRING.
819        """
820        env = self.environ
821        source = env.get('QUERY_STRING', '')
822        if 'webob._parsed_query_vars' in env:
823            vars, qs = env['webob._parsed_query_vars']
824            if qs == source:
825                return vars
826
827        data = []
828        if source:
829            # this is disabled because we want to access req.GET
830            # for text/plain; charset=ascii uploads for example
831            #self._check_charset()
832            data = parse_qsl_text(source)
833            #d = lambda b: b.decode('utf8')
834            #data = [(d(k), d(v)) for k,v in data]
835        vars = GetDict(data, env)
836        env['webob._parsed_query_vars'] = (vars, source)
837        return vars
838
839    def _check_charset(self):
840        if self.charset != 'UTF-8':
841            raise DeprecationWarning(
842                "Requests are expected to be submitted in UTF-8, not %s. "
843                "You can fix this by doing req = req.decode('%s')" % (
844                    self.charset, self.charset)
845            )
846
847    @property
848    def params(self):
849        """
850        A dictionary-like object containing both the parameters from
851        the query string and request body.
852        """
853        params = NestedMultiDict(self.GET, self.POST)
854        return params
855
856
857    @property
858    def cookies(self):
859        """
860        Return a dictionary of cookies as found in the request.
861        """
862        return RequestCookies(self.environ)
863
864    @cookies.setter
865    def cookies(self, val):
866        self.environ.pop('HTTP_COOKIE', None)
867        r = RequestCookies(self.environ)
868        r.update(val)
869
870    def copy(self):
871        """
872        Copy the request and environment object.
873
874        This only does a shallow copy, except of wsgi.input
875        """
876        self.make_body_seekable()
877        env = self.environ.copy()
878        new_req = self.__class__(env)
879        new_req.copy_body()
880        return new_req
881
882    def copy_get(self):
883        """
884        Copies the request and environment object, but turning this request
885        into a GET along the way.  If this was a POST request (or any other
886        verb) then it becomes GET, and the request body is thrown away.
887        """
888        env = self.environ.copy()
889        return self.__class__(env, method='GET', content_type=None,
890                              body=b'')
891
892    # webob.is_body_seekable marks input streams that are seekable
893    # this way we can have seekable input without testing the .seek() method
894    is_body_seekable = environ_getter('webob.is_body_seekable', False)
895
896    #is_body_readable = environ_getter('webob.is_body_readable', False)
897
898    def _is_body_readable__get(self):
899        """
900            webob.is_body_readable is a flag that tells us
901            that we can read the input stream even though
902            CONTENT_LENGTH is missing. This allows FakeCGIBody
903            to work and can be used by servers to support
904            chunked encoding in requests.
905            For background see https://bitbucket.org/ianb/webob/issue/6
906        """
907        if http_method_probably_has_body.get(self.method):
908            # known HTTP method with body
909            return True
910        elif self.content_length is not None:
911            # unknown HTTP method, but the Content-Length
912            # header is present
913            return True
914        else:
915            # last resort -- rely on the special flag
916            return self.environ.get('webob.is_body_readable', False)
917
918    def _is_body_readable__set(self, flag):
919        self.environ['webob.is_body_readable'] = bool(flag)
920
921    is_body_readable = property(_is_body_readable__get, _is_body_readable__set,
922        doc=_is_body_readable__get.__doc__
923    )
924
925
926
927    def make_body_seekable(self):
928        """
929        This forces ``environ['wsgi.input']`` to be seekable.
930        That means that, the content is copied into a BytesIO or temporary
931        file and flagged as seekable, so that it will not be unnecessarily
932        copied again.
933
934        After calling this method the .body_file is always seeked to the
935        start of file and .content_length is not None.
936
937        The choice to copy to BytesIO is made from
938        ``self.request_body_tempfile_limit``
939        """
940        if self.is_body_seekable:
941            self.body_file_raw.seek(0)
942        else:
943            self.copy_body()
944
945
946    def copy_body(self):
947        """
948        Copies the body, in cases where it might be shared with
949        another request object and that is not desired.
950
951        This copies the body in-place, either into a BytesIO object
952        or a temporary file.
953        """
954        if not self.is_body_readable:
955            # there's no body to copy
956            self.body = b''
957        elif self.content_length is None:
958            # chunked body or FakeCGIBody
959            self.body = self.body_file_raw.read()
960            self._copy_body_tempfile()
961        else:
962            # try to read body into tempfile
963            did_copy = self._copy_body_tempfile()
964            if not did_copy:
965                # it wasn't necessary, so just read it into memory
966                self.body = self.body_file.read(self.content_length)
967
968    def _copy_body_tempfile(self):
969        """
970            Copy wsgi.input to tempfile if necessary. Returns True if it did.
971        """
972        tempfile_limit = self.request_body_tempfile_limit
973        todo = self.content_length
974        assert isinstance(todo, integer_types), todo
975        if not tempfile_limit or todo <= tempfile_limit:
976            return False
977        fileobj = self.make_tempfile()
978        input = self.body_file
979        while todo > 0:
980            data = input.read(min(todo, 65536))
981            if not data:
982                # Normally this should not happen, because LimitedLengthFile
983                # should have raised an exception by now.
984                # It can happen if the is_body_seekable flag is incorrect.
985                raise DisconnectionError(
986                    "Client disconnected (%s more bytes were expected)"
987                    % todo
988                )
989            fileobj.write(data)
990            todo -= len(data)
991        fileobj.seek(0)
992        self.body_file_raw = fileobj
993        self.is_body_seekable = True
994        return True
995
996    def make_tempfile(self):
997        """
998            Create a tempfile to store big request body.
999            This API is not stable yet. A 'size' argument might be added.
1000        """
1001        return tempfile.TemporaryFile()
1002
1003
1004    def remove_conditional_headers(self,
1005                                   remove_encoding=True,
1006                                   remove_range=True,
1007                                   remove_match=True,
1008                                   remove_modified=True):
1009        """
1010        Remove headers that make the request conditional.
1011
1012        These headers can cause the response to be 304 Not Modified,
1013        which in some cases you may not want to be possible.
1014
1015        This does not remove headers like If-Match, which are used for
1016        conflict detection.
1017        """
1018        check_keys = []
1019        if remove_range:
1020            check_keys += ['HTTP_IF_RANGE', 'HTTP_RANGE']
1021        if remove_match:
1022            check_keys.append('HTTP_IF_NONE_MATCH')
1023        if remove_modified:
1024            check_keys.append('HTTP_IF_MODIFIED_SINCE')
1025        if remove_encoding:
1026            check_keys.append('HTTP_ACCEPT_ENCODING')
1027
1028        for key in check_keys:
1029            if key in self.environ:
1030                del self.environ[key]
1031
1032
1033    accept = accept_property('Accept', '14.1', MIMEAccept, MIMENilAccept)
1034    accept_charset = accept_property('Accept-Charset', '14.2', AcceptCharset)
1035    accept_encoding = accept_property('Accept-Encoding', '14.3',
1036                                      NilClass=NoAccept)
1037    accept_language = accept_property('Accept-Language', '14.4', AcceptLanguage)
1038
1039    authorization = converter(
1040        environ_getter('HTTP_AUTHORIZATION', None, '14.8'),
1041        parse_auth, serialize_auth,
1042    )
1043
1044
1045    def _cache_control__get(self):
1046        """
1047        Get/set/modify the Cache-Control header (`HTTP spec section 14.9
1048        <http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.9>`_)
1049        """
1050        env = self.environ
1051        value = env.get('HTTP_CACHE_CONTROL', '')
1052        cache_header, cache_obj = env.get('webob._cache_control', (None, None))
1053        if cache_obj is not None and cache_header == value:
1054            return cache_obj
1055        cache_obj = CacheControl.parse(value,
1056                                       updates_to=self._update_cache_control,
1057                                       type='request')
1058        env['webob._cache_control'] = (value, cache_obj)
1059        return cache_obj
1060
1061    def _cache_control__set(self, value):
1062        env = self.environ
1063        value = value or ''
1064        if isinstance(value, dict):
1065            value = CacheControl(value, type='request')
1066        if isinstance(value, CacheControl):
1067            str_value = str(value)
1068            env['HTTP_CACHE_CONTROL'] = str_value
1069            env['webob._cache_control'] = (str_value, value)
1070        else:
1071            env['HTTP_CACHE_CONTROL'] = str(value)
1072            env['webob._cache_control'] = (None, None)
1073
1074    def _cache_control__del(self):
1075        env = self.environ
1076        if 'HTTP_CACHE_CONTROL' in env:
1077            del env['HTTP_CACHE_CONTROL']
1078        if 'webob._cache_control' in env:
1079            del env['webob._cache_control']
1080
1081    def _update_cache_control(self, prop_dict):
1082        self.environ['HTTP_CACHE_CONTROL'] = serialize_cache_control(prop_dict)
1083
1084    cache_control = property(_cache_control__get,
1085                             _cache_control__set,
1086                             _cache_control__del,
1087                             doc=_cache_control__get.__doc__)
1088
1089
1090    if_match = etag_property('HTTP_IF_MATCH', AnyETag, '14.24')
1091    if_none_match = etag_property('HTTP_IF_NONE_MATCH', NoETag, '14.26',
1092                                  strong=False)
1093
1094    date = converter_date(environ_getter('HTTP_DATE', None, '14.8'))
1095    if_modified_since = converter_date(
1096                    environ_getter('HTTP_IF_MODIFIED_SINCE', None, '14.25'))
1097    if_unmodified_since = converter_date(
1098                    environ_getter('HTTP_IF_UNMODIFIED_SINCE', None, '14.28'))
1099    if_range = converter(
1100        environ_getter('HTTP_IF_RANGE', None, '14.27'),
1101        IfRange.parse, serialize_if_range, 'IfRange object')
1102
1103
1104    max_forwards = converter(
1105        environ_getter('HTTP_MAX_FORWARDS', None, '14.31'),
1106        parse_int, serialize_int, 'int')
1107
1108    pragma = environ_getter('HTTP_PRAGMA', None, '14.32')
1109
1110    range = converter(
1111        environ_getter('HTTP_RANGE', None, '14.35'),
1112        parse_range, serialize_range, 'Range object')
1113
1114    referer = environ_getter('HTTP_REFERER', None, '14.36')
1115    referrer = referer
1116
1117    user_agent = environ_getter('HTTP_USER_AGENT', None, '14.43')
1118
1119    def __repr__(self):
1120        try:
1121            name = '%s %s' % (self.method, self.url)
1122        except KeyError:
1123            name = '(invalid WSGI environ)'
1124        msg = '<%s at 0x%x %s>' % (
1125            self.__class__.__name__,
1126            abs(id(self)), name)
1127        return msg
1128
1129    def as_bytes(self, skip_body=False):
1130        """
1131            Return HTTP bytes representing this request.
1132            If skip_body is True, exclude the body.
1133            If skip_body is an integer larger than one, skip body
1134            only if its length is bigger than that number.
1135        """
1136        url = self.url
1137        host = self.host_url
1138        assert url.startswith(host)
1139        url = url[len(host):]
1140        parts = [bytes_('%s %s %s' % (self.method, url, self.http_version))]
1141        #self.headers.setdefault('Host', self.host)
1142
1143        # acquire body before we handle headers so that
1144        # content-length will be set
1145        body = None
1146        if http_method_probably_has_body.get(self.method):
1147            if skip_body > 1:
1148                if len(self.body) > skip_body:
1149                    body = bytes_('<body skipped (len=%s)>' % len(self.body))
1150                else:
1151                    skip_body = False
1152            if not skip_body:
1153                body = self.body
1154
1155        for k, v in sorted(self.headers.items()):
1156            header = bytes_('%s: %s'  % (k, v))
1157            parts.append(header)
1158
1159        if body:
1160            parts.extend([b'', body])
1161        # HTTP clearly specifies CRLF
1162        return b'\r\n'.join(parts)
1163
1164    def as_string(self, skip_body=False):
1165        # TODO: Remove in 1.4
1166        warn_deprecation(
1167            "Please use req.as_bytes",
1168            '1.3',
1169            self._setattr_stacklevel
1170            )
1171
1172    def as_text(self):
1173        bytes = self.as_bytes()
1174        return bytes.decode(self.charset)
1175
1176    __str__ = as_text
1177
1178    @classmethod
1179    def from_bytes(cls, b):
1180        """
1181            Create a request from HTTP bytes data. If the bytes contain
1182            extra data after the request, raise a ValueError.
1183        """
1184        f = io.BytesIO(b)
1185        r = cls.from_file(f)
1186        if f.tell() != len(b):
1187            raise ValueError("The string contains more data than expected")
1188        return r
1189
1190    @classmethod
1191    def from_string(cls, b):
1192        # TODO: Remove in 1.4
1193        warn_deprecation(
1194            "Please use req.from_bytes",
1195            '1.3',
1196            cls._setattr_stacklevel
1197            )
1198
1199    @classmethod
1200    def from_text(cls, s):
1201        b = bytes_(s, 'utf-8')
1202        return cls.from_bytes(b)
1203
1204    @classmethod
1205    def from_file(cls, fp):
1206        """Read a request from a file-like object (it must implement
1207        ``.read(size)`` and ``.readline()``).
1208
1209        It will read up to the end of the request, not the end of the
1210        file (unless the request is a POST or PUT and has no
1211        Content-Length, in that case, the entire file is read).
1212
1213        This reads the request as represented by ``str(req)``; it may
1214        not read every valid HTTP request properly.
1215        """
1216        start_line = fp.readline()
1217        is_text = isinstance(start_line, text_type)
1218        if is_text:
1219            crlf = '\r\n'
1220            colon = ':'
1221        else:
1222            crlf = b'\r\n'
1223            colon = b':'
1224        try:
1225            header = start_line.rstrip(crlf)
1226            method, resource, http_version = header.split(None, 2)
1227            method = native_(method, 'utf-8')
1228            resource = native_(resource, 'utf-8')
1229            http_version = native_(http_version, 'utf-8')
1230        except ValueError:
1231            raise ValueError('Bad HTTP request line: %r' % start_line)
1232        r = cls(environ_from_url(resource),
1233                http_version=http_version,
1234                method=method.upper()
1235                )
1236        del r.environ['HTTP_HOST']
1237        while 1:
1238            line = fp.readline()
1239            if not line.strip():
1240                # end of headers
1241                break
1242            hname, hval = line.split(colon, 1)
1243            hname = native_(hname, 'utf-8')
1244            hval = native_(hval, 'utf-8').strip()
1245            if hname in r.headers:
1246                hval = r.headers[hname] + ', ' + hval
1247            r.headers[hname] = hval
1248        if http_method_probably_has_body.get(r.method):
1249            clen = r.content_length
1250            if clen is None:
1251                body = fp.read()
1252            else:
1253                body = fp.read(clen)
1254            if is_text:
1255                body = bytes_(body, 'utf-8')
1256            r.body = body
1257        return r
1258
1259    def call_application(self, application, catch_exc_info=False):
1260        """
1261        Call the given WSGI application, returning ``(status_string,
1262        headerlist, app_iter)``
1263
1264        Be sure to call ``app_iter.close()`` if it's there.
1265
1266        If catch_exc_info is true, then returns ``(status_string,
1267        headerlist, app_iter, exc_info)``, where the fourth item may
1268        be None, but won't be if there was an exception.  If you don't
1269        do this and there was an exception, the exception will be
1270        raised directly.
1271        """
1272        if self.is_body_seekable:
1273            self.body_file_raw.seek(0)
1274        captured = []
1275        output = []
1276        def start_response(status, headers, exc_info=None):
1277            if exc_info is not None and not catch_exc_info:
1278                reraise(exc_info)
1279            captured[:] = [status, headers, exc_info]
1280            return output.append
1281        app_iter = application(self.environ, start_response)
1282        if output or not captured:
1283            try:
1284                output.extend(app_iter)
1285            finally:
1286                if hasattr(app_iter, 'close'):
1287                    app_iter.close()
1288            app_iter = output
1289        if catch_exc_info:
1290            return (captured[0], captured[1], app_iter, captured[2])
1291        else:
1292            return (captured[0], captured[1], app_iter)
1293
1294    # Will be filled in later:
1295    ResponseClass = None
1296
1297    def send(self, application=None, catch_exc_info=False):
1298        """
1299        Like ``.call_application(application)``, except returns a
1300        response object with ``.status``, ``.headers``, and ``.body``
1301        attributes.
1302
1303        This will use ``self.ResponseClass`` to figure out the class
1304        of the response object to return.
1305
1306        If ``application`` is not given, this will send the request to
1307        ``self.make_default_send_app()``
1308        """
1309        if application is None:
1310            application = self.make_default_send_app()
1311        if catch_exc_info:
1312            status, headers, app_iter, exc_info = self.call_application(
1313                application, catch_exc_info=True)
1314            del exc_info
1315        else:
1316            status, headers, app_iter = self.call_application(
1317                application, catch_exc_info=False)
1318        return self.ResponseClass(
1319            status=status, headerlist=list(headers), app_iter=app_iter)
1320
1321    get_response = send
1322
1323    def make_default_send_app(self):
1324        global _client
1325        try:
1326            client = _client
1327        except NameError:
1328            from webob import client
1329            _client = client
1330        return client.send_request_app
1331
1332    @classmethod
1333    def blank(cls, path, environ=None, base_url=None,
1334              headers=None, POST=None, **kw):
1335        """
1336        Create a blank request environ (and Request wrapper) with the
1337        given path (path should be urlencoded), and any keys from
1338        environ.
1339
1340        The path will become path_info, with any query string split
1341        off and used.
1342
1343        All necessary keys will be added to the environ, but the
1344        values you pass in will take precedence.  If you pass in
1345        base_url then wsgi.url_scheme, HTTP_HOST, and SCRIPT_NAME will
1346        be filled in from that value.
1347
1348        Any extra keyword will be passed to ``__init__``.
1349        """
1350        env = environ_from_url(path)
1351        if base_url:
1352            scheme, netloc, path, query, fragment = urlparse.urlsplit(base_url)
1353            if query or fragment:
1354                raise ValueError(
1355                    "base_url (%r) cannot have a query or fragment"
1356                    % base_url)
1357            if scheme:
1358                env['wsgi.url_scheme'] = scheme
1359            if netloc:
1360                if ':' not in netloc:
1361                    if scheme == 'http':
1362                        netloc += ':80'
1363                    elif scheme == 'https':
1364                        netloc += ':443'
1365                    else:
1366                        raise ValueError(
1367                            "Unknown scheme: %r" % scheme)
1368                host, port = netloc.split(':', 1)
1369                env['SERVER_PORT'] = port
1370                env['SERVER_NAME'] = host
1371                env['HTTP_HOST'] = netloc
1372            if path:
1373                env['SCRIPT_NAME'] = url_unquote(path)
1374        if environ:
1375            env.update(environ)
1376        content_type = kw.get('content_type', env.get('CONTENT_TYPE'))
1377        if headers and 'Content-Type' in headers:
1378            content_type = headers['Content-Type']
1379        if content_type is not None:
1380            kw['content_type'] = content_type
1381        environ_add_POST(env, POST, content_type=content_type)
1382        obj = cls(env, **kw)
1383        if headers is not None:
1384            obj.headers.update(headers)
1385        return obj
1386
1387class LegacyRequest(BaseRequest):
1388    uscript_name = upath_property('SCRIPT_NAME')
1389    upath_info = upath_property('PATH_INFO')
1390
1391    def encget(self, key, default=NoDefault, encattr=None):
1392        val = self.environ.get(key, default)
1393        if val is NoDefault:
1394            raise KeyError(key)
1395        if val is default:
1396            return default
1397        return val
1398
1399class AdhocAttrMixin(object):
1400    _setattr_stacklevel = 3
1401
1402    def __setattr__(self, attr, value, DEFAULT=object()):
1403        if (getattr(self.__class__, attr, DEFAULT) is not DEFAULT or
1404                    attr.startswith('_')):
1405            object.__setattr__(self, attr, value)
1406        else:
1407            self.environ.setdefault('webob.adhoc_attrs', {})[attr] = value
1408
1409    def __getattr__(self, attr, DEFAULT=object()):
1410        try:
1411            return self.environ['webob.adhoc_attrs'][attr]
1412        except KeyError:
1413            raise AttributeError(attr)
1414
1415    def __delattr__(self, attr, DEFAULT=object()):
1416        if getattr(self.__class__, attr, DEFAULT) is not DEFAULT:
1417            return object.__delattr__(self, attr)
1418        try:
1419            del self.environ['webob.adhoc_attrs'][attr]
1420        except KeyError:
1421            raise AttributeError(attr)
1422
1423class Request(AdhocAttrMixin, BaseRequest):
1424    """ The default request implementation """
1425
1426def environ_from_url(path):
1427    if SCHEME_RE.search(path):
1428        scheme, netloc, path, qs, fragment = urlparse.urlsplit(path)
1429        if fragment:
1430            raise TypeError("Path cannot contain a fragment (%r)" % fragment)
1431        if qs:
1432            path += '?' + qs
1433        if ':' not in netloc:
1434            if scheme == 'http':
1435                netloc += ':80'
1436            elif scheme == 'https':
1437                netloc += ':443'
1438            else:
1439                raise TypeError("Unknown scheme: %r" % scheme)
1440    else:
1441        scheme = 'http'
1442        netloc = 'localhost:80'
1443    if path and '?' in path:
1444        path_info, query_string = path.split('?', 1)
1445        path_info = url_unquote(path_info)
1446    else:
1447        path_info = url_unquote(path)
1448        query_string = ''
1449    env = {
1450        'REQUEST_METHOD': 'GET',
1451        'SCRIPT_NAME': '',
1452        'PATH_INFO': path_info or '',
1453        'QUERY_STRING': query_string,
1454        'SERVER_NAME': netloc.split(':')[0],
1455        'SERVER_PORT': netloc.split(':')[1],
1456        'HTTP_HOST': netloc,
1457        'SERVER_PROTOCOL': 'HTTP/1.0',
1458        'wsgi.version': (1, 0),
1459        'wsgi.url_scheme': scheme,
1460        'wsgi.input': io.BytesIO(),
1461        'wsgi.errors': sys.stderr,
1462        'wsgi.multithread': False,
1463        'wsgi.multiprocess': False,
1464        'wsgi.run_once': False,
1465        #'webob.is_body_seekable': True,
1466    }
1467    return env
1468
1469
1470def environ_add_POST(env, data, content_type=None):
1471    if data is None:
1472        return
1473    elif isinstance(data, text_type): # pragma: no cover
1474        data = data.encode('ascii')
1475    if env['REQUEST_METHOD'] not in ('POST', 'PUT'):
1476        env['REQUEST_METHOD'] = 'POST'
1477    has_files = False
1478    if hasattr(data, 'items'):
1479        data = list(data.items())
1480        for k, v in data:
1481            if isinstance(v, (tuple, list)):
1482                has_files = True
1483                break
1484    if content_type is None:
1485        if has_files:
1486            content_type = 'multipart/form-data'
1487        else:
1488            content_type = 'application/x-www-form-urlencoded'
1489    if content_type.startswith('multipart/form-data'):
1490        if not isinstance(data, bytes):
1491            content_type, data = _encode_multipart(data, content_type)
1492    elif content_type.startswith('application/x-www-form-urlencoded'):
1493        if has_files:
1494            raise ValueError('Submiting files is not allowed for'
1495                             ' content type `%s`' % content_type)
1496        if not isinstance(data, bytes):
1497            data = url_encode(data)
1498    else:
1499        if not isinstance(data, bytes):
1500            raise ValueError('Please provide `POST` data as string'
1501                             ' for content type `%s`' % content_type)
1502    data = bytes_(data, 'utf8')
1503    env['wsgi.input'] = io.BytesIO(data)
1504    env['webob.is_body_seekable'] = True
1505    env['CONTENT_LENGTH'] = str(len(data))
1506    env['CONTENT_TYPE'] = content_type
1507
1508
1509
1510#########################
1511## Helper classes and monkeypatching
1512#########################
1513
1514class DisconnectionError(IOError):
1515    pass
1516
1517
1518class LimitedLengthFile(io.RawIOBase):
1519    def __init__(self, file, maxlen):
1520        self.file = file
1521        self.maxlen = maxlen
1522        self.remaining = maxlen
1523
1524    def __repr__(self):
1525        return '<%s(%r, maxlen=%s)>' % (
1526            self.__class__.__name__,
1527            self.file,
1528            self.maxlen
1529        )
1530
1531    def fileno(self):
1532        return self.file.fileno()
1533
1534    @staticmethod
1535    def readable():
1536        return True
1537
1538    def readinto(self, buff):
1539        if not self.remaining:
1540            return 0
1541        sz0 = min(len(buff), self.remaining)
1542        data = self.file.read(sz0)
1543        sz = len(data)
1544        self.remaining -= sz
1545        #if not data:
1546        if sz < sz0 and self.remaining:
1547            raise DisconnectionError(
1548                "The client disconnected while sending the POST/PUT body "
1549                + "(%d more bytes were expected)" % self.remaining
1550            )
1551        buff[:sz] = data
1552        return sz
1553
1554
1555def _cgi_FieldStorage__repr__patch(self):
1556    """ monkey patch for FieldStorage.__repr__
1557
1558    Unbelievably, the default __repr__ on FieldStorage reads
1559    the entire file content instead of being sane about it.
1560    This is a simple replacement that doesn't do that
1561    """
1562    if self.file:
1563        return "FieldStorage(%r, %r)" % (self.name, self.filename)
1564    return "FieldStorage(%r, %r, %r)" % (self.name, self.filename, self.value)
1565
1566cgi_FieldStorage.__repr__ = _cgi_FieldStorage__repr__patch
1567
1568class FakeCGIBody(io.RawIOBase):
1569    def __init__(self, vars, content_type):
1570        if content_type.startswith('multipart/form-data'):
1571            if not _get_multipart_boundary(content_type):
1572                raise ValueError('Content-type: %r does not contain boundary'
1573                            % content_type)
1574        self.vars = vars
1575        self.content_type = content_type
1576        self.file = None
1577
1578    def __repr__(self):
1579        inner = repr(self.vars)
1580        if len(inner) > 20:
1581            inner = inner[:15] + '...' + inner[-5:]
1582        return '<%s at 0x%x viewing %s>' % (
1583            self.__class__.__name__,
1584            abs(id(self)), inner)
1585
1586    def fileno(self):
1587        return None
1588
1589    @staticmethod
1590    def readable():
1591        return True
1592
1593    def readinto(self, buff):
1594        if self.file is None:
1595            if self.content_type.startswith(
1596                'application/x-www-form-urlencoded'):
1597                data = '&'.join(
1598                    '%s=%s' % (quote_plus(bytes_(k, 'utf8')), quote_plus(bytes_(v, 'utf8')))
1599                    for k,v in self.vars.items()
1600                )
1601                self.file = io.BytesIO(bytes_(data))
1602            elif self.content_type.startswith('multipart/form-data'):
1603                self.file = _encode_multipart(
1604                    self.vars.items(),
1605                    self.content_type,
1606                    fout=io.BytesIO()
1607                )[1]
1608                self.file.seek(0)
1609            else:
1610                assert 0, ('Bad content type: %r' % self.content_type)
1611        return self.file.readinto(buff)
1612
1613
1614def _get_multipart_boundary(ctype):
1615    m = re.search(r'boundary=([^ ]+)', ctype, re.I)
1616    if m:
1617        return native_(m.group(1).strip('"'))
1618
1619
1620def _encode_multipart(vars, content_type, fout=None):
1621    """Encode a multipart request body into a string"""
1622    f = fout or io.BytesIO()
1623    w = f.write
1624    wt = lambda t: f.write(t.encode('utf8'))
1625    CRLF = b'\r\n'
1626    boundary = _get_multipart_boundary(content_type)
1627    if not boundary:
1628        boundary = native_(binascii.hexlify(os.urandom(10)))
1629        content_type += ('; boundary=%s' % boundary)
1630    for name, value in vars:
1631        w(b'--')
1632        wt(boundary)
1633        w(CRLF)
1634        assert name is not None, 'Value associated with no name: %r' % value
1635        wt('Content-Disposition: form-data; name="%s"' % name)
1636        filename = None
1637        if getattr(value, 'filename', None):
1638            filename = value.filename
1639        elif isinstance(value, (list, tuple)):
1640            filename, value = value
1641            if hasattr(value, 'read'):
1642                value = value.read()
1643
1644        if filename is not None:
1645            wt('; filename="%s"' % filename)
1646            mime_type = mimetypes.guess_type(filename)[0]
1647        else:
1648            mime_type = None
1649
1650        w(CRLF)
1651
1652        # TODO: should handle value.disposition_options
1653        if getattr(value, 'type', None):
1654            wt('Content-type: %s' % value.type)
1655            if value.type_options:
1656                for ct_name, ct_value in sorted(value.type_options.items()):
1657                    wt('; %s="%s"' % (ct_name, ct_value))
1658            w(CRLF)
1659        elif mime_type:
1660            wt('Content-type: %s' % mime_type)
1661            w(CRLF)
1662        w(CRLF)
1663        if hasattr(value, 'value'):
1664            value = value.value
1665        if isinstance(value, bytes):
1666            w(value)
1667        else:
1668            wt(value)
1669        w(CRLF)
1670    wt('--%s--' % boundary)
1671    if fout:
1672        return content_type, fout
1673    else:
1674        return content_type, f.getvalue()
1675
1676def detect_charset(ctype):
1677    m = CHARSET_RE.search(ctype)
1678    if m:
1679        return m.group(1).strip('"').strip()
1680
1681def _is_utf8(charset):
1682    if not charset:
1683        return True
1684    else:
1685        return charset.lower().replace('-', '') == 'utf8'
1686
1687
1688class Transcoder(object):
1689    def __init__(self, charset, errors='strict'):
1690        self.charset = charset # source charset
1691        self.errors = errors # unicode errors
1692        self._trans = lambda b: b.decode(charset, errors).encode('utf8')
1693
1694    def transcode_query(self, q):
1695        if PY3: # pragma: no cover
1696            q_orig = q
1697            if '=' not in q:
1698                # this doesn't look like a form submission
1699                return q_orig
1700            q = list(parse_qsl_text(q, self.charset))
1701            return url_encode(q)
1702        else:
1703            q_orig = q
1704            if '=' not in q:
1705                # this doesn't look like a form submission
1706                return q_orig
1707            q = urlparse.parse_qsl(q, self.charset)
1708            t = self._trans
1709            q = [(t(k), t(v)) for k,v in q]
1710            return url_encode(q)
1711
1712    def transcode_fs(self, fs, content_type):
1713        # transcode FieldStorage
1714        if PY3: # pragma: no cover
1715            decode = lambda b: b
1716        else:
1717            decode = lambda b: b.decode(self.charset, self.errors)
1718        data = []
1719        for field in fs.list or ():
1720            field.name = decode(field.name)
1721            if field.filename:
1722                field.filename = decode(field.filename)
1723                data.append((field.name, field))
1724            else:
1725                data.append((field.name, decode(field.value)))
1726
1727        # TODO: transcode big requests to temp file
1728        content_type, fout = _encode_multipart(
1729            data,
1730            content_type,
1731            fout=io.BytesIO()
1732        )
1733        return fout
1734
1735# TODO: remove in 1.4
1736for _name in 'GET POST params cookies'.split():
1737    _str_name = 'str_'+_name
1738    _prop = deprecated_property(
1739        None, _str_name,
1740        "disabled starting WebOb 1.2, use %s instead" % _name, '1.2')
1741    setattr(BaseRequest, _str_name, _prop)
1742