1 from __future__ import absolute_import
2 from contextlib import contextmanager
5 from socket import timeout as SocketTimeout
6 from socket import error as SocketError
8 from ._collections import HTTPHeaderDict
9 from .exceptions import (
10 ProtocolError, DecodeError, ReadTimeoutError, ResponseNotChunked
12 from .packages.six import string_types as basestring, binary_type, PY3
13 from .packages.six.moves import http_client as httplib
14 from .connection import HTTPException, BaseSSLError
15 from .util.response import is_fp_closed, is_response_to_head
18 class DeflateDecoder(object):
21 self._first_try = True
22 self._data = binary_type()
23 self._obj = zlib.decompressobj()
25 def __getattr__(self, name):
26 return getattr(self._obj, name)
28 def decompress(self, data):
32 if not self._first_try:
33 return self._obj.decompress(data)
37 return self._obj.decompress(data)
39 self._first_try = False
40 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
42 return self.decompress(self._data)
47 class GzipDecoder(object):
50 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
52 def __getattr__(self, name):
53 return getattr(self._obj, name)
55 def decompress(self, data):
58 return self._obj.decompress(data)
61 def _get_decoder(mode):
65 return DeflateDecoder()
68 class HTTPResponse(io.IOBase):
70 HTTP Response container.
72 Backwards-compatible to httplib's HTTPResponse but the response ``body`` is
73 loaded and decoded on-demand when the ``data`` property is accessed. This
74 class is also compatible with the Python standard library's :mod:`io`
75 module, and can hence be treated as a readable object in the context of that
78 Extra parameters for behaviour not present in httplib.HTTPResponse:
80 :param preload_content:
81 If True, the response's body will be preloaded during construction.
83 :param decode_content:
84 If True, attempts to decode specific content-encoding's based on headers
85 (like 'gzip' and 'deflate') will be skipped and raw data will be used
88 :param original_response:
89 When this HTTPResponse wrapper is generated from an httplib.HTTPResponse
90 object, it's convenient to include the original for debug purposes. It's
94 CONTENT_DECODERS = ['gzip', 'deflate']
95 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
97 def __init__(self, body='', headers=None, status=0, version=0, reason=None,
98 strict=0, preload_content=True, decode_content=True,
99 original_response=None, pool=None, connection=None):
101 if isinstance(headers, HTTPHeaderDict):
102 self.headers = headers
104 self.headers = HTTPHeaderDict(headers)
106 self.version = version
109 self.decode_content = decode_content
114 self._original_response = original_response
115 self._fp_bytes_read = 0
117 if body and isinstance(body, (basestring, binary_type)):
121 self._connection = connection
123 if hasattr(body, 'read'):
126 # Are we using the chunked-style of transfer encoding?
128 self.chunk_left = None
129 tr_enc = self.headers.get('transfer-encoding', '').lower()
130 # Don't incur the penalty of creating a list and then discarding it
131 encodings = (enc.strip() for enc in tr_enc.split(","))
132 if "chunked" in encodings:
135 # If requested, preload the body.
136 if preload_content and not self._body:
137 self._body = self.read(decode_content=decode_content)
139 def get_redirect_location(self):
141 Should we redirect and where to?
143 :returns: Truthy redirect location string if we got a redirect status
144 code and valid location. ``None`` if redirect status and no
145 location. ``False`` if not a redirect status code.
147 if self.status in self.REDIRECT_STATUSES:
148 return self.headers.get('location')
152 def release_conn(self):
153 if not self._pool or not self._connection:
156 self._pool._put_conn(self._connection)
157 self._connection = None
161 # For backwords-compat with earlier urllib3 0.4 and earlier.
166 return self.read(cache_content=True)
170 Obtain the number of bytes pulled over the wire so far. May differ from
171 the amount of content returned by :meth:``HTTPResponse.read`` if bytes
172 are encoded on the wire (e.g, compressed).
174 return self._fp_bytes_read
176 def _init_decoder(self):
178 Set-up the _decoder attribute if necessar.
180 # Note: content-encoding value should be case-insensitive, per RFC 7230
182 content_encoding = self.headers.get('content-encoding', '').lower()
183 if self._decoder is None and content_encoding in self.CONTENT_DECODERS:
184 self._decoder = _get_decoder(content_encoding)
186 def _decode(self, data, decode_content, flush_decoder):
188 Decode the data passed in and potentially flush the decoder.
191 if decode_content and self._decoder:
192 data = self._decoder.decompress(data)
193 except (IOError, zlib.error) as e:
194 content_encoding = self.headers.get('content-encoding', '').lower()
196 "Received response with content-encoding: %s, but "
197 "failed to decode it." % content_encoding, e)
199 if flush_decoder and decode_content:
200 data += self._flush_decoder()
204 def _flush_decoder(self):
206 Flushes the decoder. Should only be called if the decoder is actually
210 buf = self._decoder.decompress(b'')
211 return buf + self._decoder.flush()
216 def _error_catcher(self):
218 Catch low-level python exceptions, instead re-raising urllib3
219 variants, so that low-level exceptions are not leaked in the
222 On exit, release the connection back to the pool.
230 except SocketTimeout:
231 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
232 # there is yet no clean way to get at it from this context.
233 raise ReadTimeoutError(self._pool, None, 'Read timed out.')
235 except BaseSSLError as e:
236 # FIXME: Is there a better way to differentiate between SSLErrors?
237 if 'read operation timed out' not in str(e): # Defensive:
238 # This shouldn't happen but just in case we're missing an edge
239 # case, let's avoid swallowing SSL errors.
242 raise ReadTimeoutError(self._pool, None, 'Read timed out.')
244 except (HTTPException, SocketError) as e:
245 # This includes IncompleteRead.
246 raise ProtocolError('Connection broken: %r' % e, e)
248 # If no exception is thrown, we should avoid cleaning up
252 # If we didn't terminate cleanly, we need to throw away our
255 # The response may not be closed but we're not going to use it
256 # anymore so close it now to ensure that the connection is
257 # released back to the pool.
258 if self._original_response:
259 self._original_response.close()
261 # Closing the response may not actually be sufficient to close
262 # everything, so if we have a hold of the connection close that
265 self._connection.close()
267 # If we hold the original response but it's closed now, we should
268 # return the connection back to the pool.
269 if self._original_response and self._original_response.isclosed():
272 def read(self, amt=None, decode_content=None, cache_content=False):
274 Similar to :meth:`httplib.HTTPResponse.read`, but with two additional
275 parameters: ``decode_content`` and ``cache_content``.
278 How much of the content to read. If specified, caching is skipped
279 because it doesn't make sense to cache partial content as the full
282 :param decode_content:
283 If True, will attempt to decode the body based on the
284 'content-encoding' header.
286 :param cache_content:
287 If True, will save the returned data such that the same result is
288 returned despite of the state of the underlying file object. This
289 is useful if you want the ``.data`` property to continue working
290 after having ``.read()`` the file object. (Overridden if ``amt`` is
294 if decode_content is None:
295 decode_content = self.decode_content
300 flush_decoder = False
303 with self._error_catcher():
305 # cStringIO doesn't like amt=None
306 data = self._fp.read()
309 cache_content = False
310 data = self._fp.read(amt)
311 if amt != 0 and not data: # Platform-specific: Buggy versions of Python.
312 # Close the connection when no data is returned
314 # This is redundant to what httplib/http.client _should_
315 # already do. However, versions of python released before
316 # December 15, 2012 (http://bugs.python.org/issue16298) do
317 # not properly close the connection in all cases. There is
318 # no harm in redundantly calling close.
323 self._fp_bytes_read += len(data)
325 data = self._decode(data, decode_content, flush_decoder)
332 def stream(self, amt=2**16, decode_content=None):
334 A generator wrapper for the read() method. A call will block until
335 ``amt`` bytes have been read from the connection or until the
336 connection is closed.
339 How much of the content to read. The generator will return up to
340 much data per iteration, but may return less. This is particularly
341 likely when using compressed data. However, the empty string will
344 :param decode_content:
345 If True, will attempt to decode the body based on the
346 'content-encoding' header.
349 for line in self.read_chunked(amt, decode_content=decode_content):
352 while not is_fp_closed(self._fp):
353 data = self.read(amt=amt, decode_content=decode_content)
359 def from_httplib(ResponseCls, r, **response_kw):
361 Given an :class:`httplib.HTTPResponse` instance ``r``, return a
362 corresponding :class:`urllib3.response.HTTPResponse` object.
364 Remaining parameters are passed to the HTTPResponse constructor, along
365 with ``original_response=r``.
369 if not isinstance(headers, HTTPHeaderDict):
371 headers = HTTPHeaderDict(headers.items())
373 headers = HTTPHeaderDict.from_httplib(headers)
375 # HTTPResponse objects in Python 3 don't have a .strict attribute
376 strict = getattr(r, 'strict', 0)
377 resp = ResponseCls(body=r,
387 # Backwards-compatibility methods for httplib.HTTPResponse
388 def getheaders(self):
391 def getheader(self, name, default=None):
392 return self.headers.get(name, default)
394 # Overrides from io.IOBase
400 self._connection.close()
406 elif hasattr(self._fp, 'closed'):
407 return self._fp.closed
408 elif hasattr(self._fp, 'isclosed'): # Python 2
409 return self._fp.isclosed()
415 raise IOError("HTTPResponse has no file to get a fileno from")
416 elif hasattr(self._fp, "fileno"):
417 return self._fp.fileno()
419 raise IOError("The file-like object this HTTPResponse is wrapped "
420 "around has no file descriptor")
423 if self._fp is not None and hasattr(self._fp, 'flush'):
424 return self._fp.flush()
427 # This method is required for `io` module compatibility.
430 def readinto(self, b):
431 # This method is required for `io` module compatibility.
432 temp = self.read(len(b))
439 def _update_chunk_length(self):
440 # First, we'll figure out length of a chunk and then
441 # we'll try to read it from socket.
442 if self.chunk_left is not None:
444 line = self._fp.fp.readline()
445 line = line.split(b';', 1)[0]
447 self.chunk_left = int(line, 16)
449 # Invalid chunked protocol response, abort.
451 raise httplib.IncompleteRead(line)
453 def _handle_chunk(self, amt):
454 returned_chunk = None
456 chunk = self._fp._safe_read(self.chunk_left)
457 returned_chunk = chunk
458 self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
459 self.chunk_left = None
460 elif amt < self.chunk_left:
461 value = self._fp._safe_read(amt)
462 self.chunk_left = self.chunk_left - amt
463 returned_chunk = value
464 elif amt == self.chunk_left:
465 value = self._fp._safe_read(amt)
466 self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
467 self.chunk_left = None
468 returned_chunk = value
469 else: # amt > self.chunk_left
470 returned_chunk = self._fp._safe_read(self.chunk_left)
471 self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
472 self.chunk_left = None
473 return returned_chunk
475 def read_chunked(self, amt=None, decode_content=None):
477 Similar to :meth:`HTTPResponse.read`, but with an additional
478 parameter: ``decode_content``.
480 :param decode_content:
481 If True, will attempt to decode the body based on the
482 'content-encoding' header.
485 # FIXME: Rewrite this method and make it a class with a better structured logic.
487 raise ResponseNotChunked(
488 "Response is not chunked. "
489 "Header 'transfer-encoding: chunked' is missing.")
491 # Don't bother reading the body of a HEAD request.
492 if self._original_response and is_response_to_head(self._original_response):
493 self._original_response.close()
496 with self._error_catcher():
498 self._update_chunk_length()
499 if self.chunk_left == 0:
501 chunk = self._handle_chunk(amt)
502 decoded = self._decode(chunk, decode_content=decode_content,
508 # On CPython and PyPy, we should never need to flush the
509 # decoder. However, on Jython we *might* need to, so
510 # lets defensively do it anyway.
511 decoded = self._flush_decoder()
512 if decoded: # Platform-specific: Jython.
515 # Chunk content ends with \r\n: discard it.
517 line = self._fp.fp.readline()
519 # Some sites may not end with '\r\n'.
524 # We read everything; close the "file".
525 if self._original_response:
526 self._original_response.close()