1 from __future__ import absolute_import
7 from socket import error as SocketError, timeout as SocketTimeout
11 from queue import LifoQueue, Empty, Full
13 from Queue import LifoQueue, Empty, Full
14 # Queue is imported for side effects on MS Windows
15 import Queue as _unused_module_Queue # noqa: unused
18 from .exceptions import (
30 InsecureRequestWarning,
33 from .packages.ssl_match_hostname import CertificateError
34 from .packages import six
35 from .connection import (
38 HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection,
39 HTTPException, BaseSSLError,
41 from .request import RequestMethods
42 from .response import HTTPResponse
44 from .util.connection import is_connection_dropped
45 from .util.response import assert_header_parsing
46 from .util.retry import Retry
47 from .util.timeout import Timeout
48 from .util.url import get_host, Url
51 xrange = six.moves.xrange
53 log = logging.getLogger(__name__)
59 class ConnectionPool(object):
61 Base class for all connection pools, such as
62 :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`.
68 def __init__(self, host, port=None):
70 raise LocationValueError("No host specified.")
72 # httplib doesn't like it when we include brackets in ipv6 addresses
73 # Specifically, if we include brackets but also pass the port then
74 # httplib crazily doubles up the square brackets on the Host header.
75 # Instead, we need to make sure we never pass ``None`` as the port.
76 # However, for backward compatibility reasons we can't actually
78 self.host = host.strip('[]')
82 return '%s(host=%r, port=%r)' % (type(self).__name__,
88 def __exit__(self, exc_type, exc_val, exc_tb):
90 # Return False to re-raise any potential exceptions
95 Close all pooled connections and disable the pool.
100 # This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252
101 _blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK])
104 class HTTPConnectionPool(ConnectionPool, RequestMethods):
106 Thread-safe connection pool for one host.
109 Host used for this HTTP Connection (e.g. "localhost"), passed into
110 :class:`httplib.HTTPConnection`.
113 Port used for this HTTP Connection (None is equivalent to 80), passed
114 into :class:`httplib.HTTPConnection`.
117 Causes BadStatusLine to be raised if the status line can't be parsed
118 as a valid HTTP/1.0 or 1.1 status line, passed into
119 :class:`httplib.HTTPConnection`.
122 Only works in Python 2. This parameter is ignored in Python 3.
125 Socket timeout in seconds for each individual connection. This can
126 be a float or integer, which sets the timeout for the HTTP request,
127 or an instance of :class:`urllib3.util.Timeout` which gives you more
128 fine-grained control over request timeouts. After the constructor has
129 been parsed, this is always a `urllib3.util.Timeout` object.
132 Number of connections to save that can be reused. More than 1 is useful
133 in multithreaded situations. If ``block`` is set to False, more
134 connections will be created but they will not be saved once they've
138 If set to True, no more than ``maxsize`` connections will be used at
139 a time. When no free connections are available, the call will block
140 until a connection has been released. This is a useful side effect for
141 particular multithreaded situations where one does not want to use more
142 than maxsize connections per host to prevent flooding.
145 Headers to include with all requests, unless other headers are given
149 Retry configuration to use by default with requests in this pool.
152 Parsed proxy URL, should not be used directly, instead, see
153 :class:`urllib3.connectionpool.ProxyManager`"
155 :param _proxy_headers:
156 A dictionary with proxy headers, should not be used directly,
157 instead, see :class:`urllib3.connectionpool.ProxyManager`"
160 Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`,
161 :class:`urllib3.connection.HTTPSConnection` instances.
165 ConnectionCls = HTTPConnection
167 def __init__(self, host, port=None, strict=False,
168 timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False,
169 headers=None, retries=None,
170 _proxy=None, _proxy_headers=None,
172 ConnectionPool.__init__(self, host, port)
173 RequestMethods.__init__(self, headers)
177 if not isinstance(timeout, Timeout):
178 timeout = Timeout.from_float(timeout)
181 retries = Retry.DEFAULT
183 self.timeout = timeout
184 self.retries = retries
186 self.pool = self.QueueCls(maxsize)
190 self.proxy_headers = _proxy_headers or {}
192 # Fill the queue up so that doing get() on it will block properly
193 for _ in xrange(maxsize):
196 # These are mostly for testing and debugging purposes.
197 self.num_connections = 0
198 self.num_requests = 0
199 self.conn_kw = conn_kw
202 # Enable Nagle's algorithm for proxies, to avoid packet fragmentation.
203 # We cannot know if the user has added default socket options, so we cannot replace the
205 self.conn_kw.setdefault('socket_options', [])
209 Return a fresh :class:`HTTPConnection`.
211 self.num_connections += 1
212 log.info("Starting new HTTP connection (%d): %s",
213 self.num_connections, self.host)
215 conn = self.ConnectionCls(host=self.host, port=self.port,
216 timeout=self.timeout.connect_timeout,
217 strict=self.strict, **self.conn_kw)
220 def _get_conn(self, timeout=None):
222 Get a connection. Will return a pooled connection if one is available.
224 If no connections are available and :prop:`.block` is ``False``, then a
225 fresh connection is returned.
228 Seconds to wait before giving up and raising
229 :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and
230 :prop:`.block` is ``True``.
234 conn = self.pool.get(block=self.block, timeout=timeout)
236 except AttributeError: # self.pool is None
237 raise ClosedPoolError(self, "Pool is closed.")
241 raise EmptyPoolError(self,
242 "Pool reached maximum size and no more "
243 "connections are allowed.")
244 pass # Oh well, we'll create a new connection then
246 # If this is a persistent connection, check if it got disconnected
247 if conn and is_connection_dropped(conn):
248 log.info("Resetting dropped connection: %s", self.host)
250 if getattr(conn, 'auto_open', 1) == 0:
251 # This is a proxied connection that has been mutated by
252 # httplib._tunnel() and cannot be reused (since it would
253 # attempt to bypass the proxy)
256 return conn or self._new_conn()
258 def _put_conn(self, conn):
260 Put a connection back into the pool.
263 Connection object for the current host and port as returned by
264 :meth:`._new_conn` or :meth:`._get_conn`.
266 If the pool is already full, the connection is closed and discarded
267 because we exceeded maxsize. If connections are discarded frequently,
268 then maxsize should be increased.
270 If the pool is closed, then the connection will be closed and discarded.
273 self.pool.put(conn, block=False)
274 return # Everything is dandy, done.
275 except AttributeError:
279 # This should never happen if self.block == True
281 "Connection pool is full, discarding connection: %s",
284 # Connection never got put back into the pool, close it.
288 def _validate_conn(self, conn):
290 Called right before a request is made, after the socket is created.
294 def _prepare_proxy(self, conn):
295 # Nothing to do for HTTP connections.
298 def _get_timeout(self, timeout):
299 """ Helper that always returns a :class:`urllib3.util.Timeout` """
300 if timeout is _Default:
301 return self.timeout.clone()
303 if isinstance(timeout, Timeout):
304 return timeout.clone()
306 # User passed us an int/float. This is for backwards compatibility,
307 # can be removed later
308 return Timeout.from_float(timeout)
310 def _raise_timeout(self, err, url, timeout_value):
311 """Is the error actually a timeout? Will raise a ReadTimeout or pass"""
313 if isinstance(err, SocketTimeout):
314 raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value)
316 # See the above comment about EAGAIN in Python 3. In Python 2 we have
317 # to specifically catch it and throw the timeout error
318 if hasattr(err, 'errno') and err.errno in _blocking_errnos:
319 raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value)
321 # Catch possible read timeouts thrown as SSL errors. If not the
322 # case, rethrow the original. We need to do this because of:
323 # http://bugs.python.org/issue10272
324 if 'timed out' in str(err) or 'did not complete (read)' in str(err): # Python 2.6
325 raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value)
327 def _make_request(self, conn, method, url, timeout=_Default, chunked=False,
328 **httplib_request_kw):
330 Perform a request on a given urllib connection object taken from our
334 a connection from one of our connection pools
337 Socket timeout in seconds for the request. This can be a
338 float or integer, which will set the same timeout value for
339 the socket connect and the socket read, or an instance of
340 :class:`urllib3.util.Timeout`, which gives you more fine-grained
341 control over your timeouts.
343 self.num_requests += 1
345 timeout_obj = self._get_timeout(timeout)
346 timeout_obj.start_connect()
347 conn.timeout = timeout_obj.connect_timeout
349 # Trigger any extra validation we need to do.
351 self._validate_conn(conn)
352 except (SocketTimeout, BaseSSLError) as e:
353 # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.
354 self._raise_timeout(err=e, url=url, timeout_value=conn.timeout)
357 # conn.request() calls httplib.*.request, not the method in
358 # urllib3.request. It also calls makefile (recv) on the socket.
360 conn.request_chunked(method, url, **httplib_request_kw)
362 conn.request(method, url, **httplib_request_kw)
364 # Reset the timeout for the recv() on the socket
365 read_timeout = timeout_obj.read_timeout
367 # App Engine doesn't have a sock attr
368 if getattr(conn, 'sock', None):
369 # In Python 3 socket.py will catch EAGAIN and return None when you
370 # try and read into the file pointer created by http.client, which
371 # instead raises a BadStatusLine exception. Instead of catching
372 # the exception and assuming all BadStatusLine exceptions are read
373 # timeouts, check for a zero timeout before making the request.
374 if read_timeout == 0:
375 raise ReadTimeoutError(
376 self, url, "Read timed out. (read timeout=%s)" % read_timeout)
377 if read_timeout is Timeout.DEFAULT_TIMEOUT:
378 conn.sock.settimeout(socket.getdefaulttimeout())
379 else: # None or a value
380 conn.sock.settimeout(read_timeout)
382 # Receive the response from the server
384 try: # Python 2.7, use buffering of HTTP responses
385 httplib_response = conn.getresponse(buffering=True)
386 except TypeError: # Python 2.6 and older
387 httplib_response = conn.getresponse()
388 except (SocketTimeout, BaseSSLError, SocketError) as e:
389 self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
392 # AppEngine doesn't have a version attr.
393 http_version = getattr(conn, '_http_vsn_str', 'HTTP/?')
394 log.debug("\"%s %s %s\" %s %s", method, url, http_version,
395 httplib_response.status, httplib_response.length)
398 assert_header_parsing(httplib_response.msg)
399 except HeaderParsingError as hpe: # Platform-specific: Python 3
401 'Failed to parse headers (url=%s): %s',
402 self._absolute_url(url), hpe, exc_info=True)
404 return httplib_response
406 def _absolute_url(self, path):
407 return Url(scheme=self.scheme, host=self.host, port=self.port, path=path).url
411 Close all pooled connections and disable the pool.
413 # Disable access to the pool
414 old_pool, self.pool = self.pool, None
418 conn = old_pool.get(block=False)
425 def is_same_host(self, url):
427 Check if the given ``url`` is a member of the same host as this
430 if url.startswith('/'):
433 # TODO: Add optional support for socket.gethostbyname checking.
434 scheme, host, port = get_host(url)
436 # Use explicit default port for comparison when none is given
437 if self.port and not port:
438 port = port_by_scheme.get(scheme)
439 elif not self.port and port == port_by_scheme.get(scheme):
442 return (scheme, host, port) == (self.scheme, self.host, self.port)
444 def urlopen(self, method, url, body=None, headers=None, retries=None,
445 redirect=True, assert_same_host=True, timeout=_Default,
446 pool_timeout=None, release_conn=None, chunked=False,
449 Get a connection from the pool and perform an HTTP request. This is the
450 lowest level call for making a request, so you'll need to specify all
455 More commonly, it's appropriate to use a convenience method provided
456 by :class:`.RequestMethods`, such as :meth:`request`.
460 `release_conn` will only behave as expected if
461 `preload_content=False` because we want to make
462 `preload_content=False` the default behaviour someday soon without
463 breaking backwards compatibility.
466 HTTP request method (such as GET, POST, PUT, etc.)
469 Data to send in the request body (useful for creating
470 POST requests, see HTTPConnectionPool.post_url for
474 Dictionary of custom headers to send, such as User-Agent,
475 If-None-Match, etc. If None, pool headers are used. If provided,
476 these headers completely replace any pool-specific headers.
479 Configure the number of retries to allow before raising a
480 :class:`~urllib3.exceptions.MaxRetryError` exception.
482 Pass ``None`` to retry until you receive a response. Pass a
483 :class:`~urllib3.util.retry.Retry` object for fine-grained control
484 over different types of retries.
485 Pass an integer number to retry connection errors that many times,
486 but no other types of errors. Pass zero to never retry.
488 If ``False``, then retries are disabled and any exception is raised
489 immediately. Also, instead of raising a MaxRetryError on redirects,
490 the redirect response will be returned.
492 :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int.
495 If True, automatically handle redirects (status codes 301, 302,
496 303, 307, 308). Each redirect counts as a retry. Disabling retries
497 will disable redirect, too.
499 :param assert_same_host:
500 If ``True``, will make sure that the host of the pool requests is
501 consistent else will raise HostChangedError. When False, you can
502 use the pool on an HTTP proxy and request foreign hosts.
505 If specified, overrides the default timeout for this one
506 request. It may be a float (in seconds) or an instance of
507 :class:`urllib3.util.Timeout`.
510 If set and the pool is set to block=True, then this method will
511 block for ``pool_timeout`` seconds and raise EmptyPoolError if no
512 connection is available within the time period.
515 If False, then the urlopen call will not release the connection
516 back into the pool once a response is received (but will release if
517 you read the entire contents of the response such as when
518 `preload_content=True`). This is useful if you're not preloading
519 the response's content immediately. You will need to call
520 ``r.release_conn()`` on the response ``r`` to return the connection
521 back into the pool. If None, it takes the value of
522 ``response_kw.get('preload_content', True)``.
525 If True, urllib3 will send the body using chunked transfer
526 encoding. Otherwise, urllib3 will send the body using the standard
527 content-length form. Defaults to False.
529 :param \**response_kw:
530 Additional parameters are passed to
531 :meth:`urllib3.response.HTTPResponse.from_httplib`
534 headers = self.headers
536 if not isinstance(retries, Retry):
537 retries = Retry.from_int(retries, redirect=redirect, default=self.retries)
539 if release_conn is None:
540 release_conn = response_kw.get('preload_content', True)
543 if assert_same_host and not self.is_same_host(url):
544 raise HostChangedError(self, url, retries)
548 # Merge the proxy headers. Only do this in HTTP. We have to copy the
549 # headers dict so we can safely change it without those changes being
550 # reflected in anyone else's copy.
551 if self.scheme == 'http':
552 headers = headers.copy()
553 headers.update(self.proxy_headers)
555 # Must keep the exception bound to a separate variable or else Python 3
556 # complains about UnboundLocalError.
559 # Keep track of whether we cleanly exited the except block. This
560 # ensures we do proper cleanup in finally.
564 # Request a connection from the queue.
565 timeout_obj = self._get_timeout(timeout)
566 conn = self._get_conn(timeout=pool_timeout)
568 conn.timeout = timeout_obj.connect_timeout
570 is_new_proxy_conn = self.proxy is not None and not getattr(conn, 'sock', None)
571 if is_new_proxy_conn:
572 self._prepare_proxy(conn)
574 # Make the request on the httplib connection object.
575 httplib_response = self._make_request(conn, method, url,
577 body=body, headers=headers,
580 # If we're going to release the connection in ``finally:``, then
581 # the response doesn't need to know about the connection. Otherwise
582 # it will also try to release it and we'll have a double-release
584 response_conn = conn if not release_conn else None
586 # Import httplib's response into our own wrapper object
587 response = HTTPResponse.from_httplib(httplib_response,
589 connection=response_conn,
592 # Everything went great!
596 # Timed out by queue.
597 raise EmptyPoolError(self, "No pool connections are available.")
599 except (BaseSSLError, CertificateError) as e:
600 # Close the connection. If a connection is reused on which there
601 # was a Certificate error, the next request will certainly raise
602 # another Certificate error.
607 # Treat SSLError separately from BaseSSLError to preserve
612 except (TimeoutError, HTTPException, SocketError, ProtocolError) as e:
613 # Discard the connection for these exceptions. It will be
614 # be replaced during the next _get_conn() call.
617 if isinstance(e, (SocketError, NewConnectionError)) and self.proxy:
618 e = ProxyError('Cannot connect to proxy.', e)
619 elif isinstance(e, (SocketError, HTTPException)):
620 e = ProtocolError('Connection aborted.', e)
622 retries = retries.increment(method, url, error=e, _pool=self,
623 _stacktrace=sys.exc_info()[2])
626 # Keep track of the error for the retry warning.
631 # We hit some kind of exception, handled or otherwise. We need
632 # to throw the connection away unless explicitly told not to.
633 # Close the connection, set the variable to None, and make sure
634 # we put the None back in the pool to avoid leaking it.
635 conn = conn and conn.close()
639 # Put the connection back to be reused. If the connection is
640 # expired then it will be None, which will get replaced with a
641 # fresh connection during _get_conn.
646 log.warning("Retrying (%r) after connection "
647 "broken by '%r': %s", retries, err, url)
648 return self.urlopen(method, url, body, headers, retries,
649 redirect, assert_same_host,
650 timeout=timeout, pool_timeout=pool_timeout,
651 release_conn=release_conn, **response_kw)
654 redirect_location = redirect and response.get_redirect_location()
655 if redirect_location:
656 if response.status == 303:
660 retries = retries.increment(method, url, response=response, _pool=self)
661 except MaxRetryError:
662 if retries.raise_on_redirect:
663 # Release the connection for this response, since we're not
664 # returning it to be released manually.
665 response.release_conn()
669 log.info("Redirecting %s -> %s", url, redirect_location)
671 method, redirect_location, body, headers,
672 retries=retries, redirect=redirect,
673 assert_same_host=assert_same_host,
674 timeout=timeout, pool_timeout=pool_timeout,
675 release_conn=release_conn, **response_kw)
677 # Check if we should retry the HTTP response.
678 if retries.is_forced_retry(method, status_code=response.status):
680 retries = retries.increment(method, url, response=response, _pool=self)
681 except MaxRetryError:
682 if retries.raise_on_status:
683 # Release the connection for this response, since we're not
684 # returning it to be released manually.
685 response.release_conn()
689 log.info("Forced retry: %s", url)
691 method, url, body, headers,
692 retries=retries, redirect=redirect,
693 assert_same_host=assert_same_host,
694 timeout=timeout, pool_timeout=pool_timeout,
695 release_conn=release_conn, **response_kw)
700 class HTTPSConnectionPool(HTTPConnectionPool):
702 Same as :class:`.HTTPConnectionPool`, but HTTPS.
704 When Python is compiled with the :mod:`ssl` module, then
705 :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates,
706 instead of :class:`.HTTPSConnection`.
708 :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``,
709 ``assert_hostname`` and ``host`` in this order to verify connections.
710 If ``assert_hostname`` is False, no verification is done.
712 The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs``,
713 ``ca_cert_dir``, and ``ssl_version`` are only used if :mod:`ssl` is
714 available and are fed into :meth:`urllib3.util.ssl_wrap_socket` to upgrade
715 the connection socket into an SSL socket.
719 ConnectionCls = HTTPSConnection
721 def __init__(self, host, port=None,
722 strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1,
723 block=False, headers=None, retries=None,
724 _proxy=None, _proxy_headers=None,
725 key_file=None, cert_file=None, cert_reqs=None,
726 ca_certs=None, ssl_version=None,
727 assert_hostname=None, assert_fingerprint=None,
728 ca_cert_dir=None, **conn_kw):
730 HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize,
731 block, headers, retries, _proxy, _proxy_headers,
734 if ca_certs and cert_reqs is None:
735 cert_reqs = 'CERT_REQUIRED'
737 self.key_file = key_file
738 self.cert_file = cert_file
739 self.cert_reqs = cert_reqs
740 self.ca_certs = ca_certs
741 self.ca_cert_dir = ca_cert_dir
742 self.ssl_version = ssl_version
743 self.assert_hostname = assert_hostname
744 self.assert_fingerprint = assert_fingerprint
746 def _prepare_conn(self, conn):
748 Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket`
749 and establish the tunnel if proxy is used.
752 if isinstance(conn, VerifiedHTTPSConnection):
753 conn.set_cert(key_file=self.key_file,
754 cert_file=self.cert_file,
755 cert_reqs=self.cert_reqs,
756 ca_certs=self.ca_certs,
757 ca_cert_dir=self.ca_cert_dir,
758 assert_hostname=self.assert_hostname,
759 assert_fingerprint=self.assert_fingerprint)
760 conn.ssl_version = self.ssl_version
764 def _prepare_proxy(self, conn):
766 Establish tunnel connection early, because otherwise httplib
767 would improperly set Host: header to proxy's IP:port.
771 set_tunnel = conn.set_tunnel
772 except AttributeError: # Platform-specific: Python 2.6
773 set_tunnel = conn._set_tunnel
775 if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older
776 set_tunnel(self.host, self.port)
778 set_tunnel(self.host, self.port, self.proxy_headers)
784 Return a fresh :class:`httplib.HTTPSConnection`.
786 self.num_connections += 1
787 log.info("Starting new HTTPS connection (%d): %s",
788 self.num_connections, self.host)
790 if not self.ConnectionCls or self.ConnectionCls is DummyConnection:
791 raise SSLError("Can't connect to HTTPS URL because the SSL "
792 "module is not available.")
794 actual_host = self.host
795 actual_port = self.port
796 if self.proxy is not None:
797 actual_host = self.proxy.host
798 actual_port = self.proxy.port
800 conn = self.ConnectionCls(host=actual_host, port=actual_port,
801 timeout=self.timeout.connect_timeout,
802 strict=self.strict, **self.conn_kw)
804 return self._prepare_conn(conn)
806 def _validate_conn(self, conn):
808 Called right before a request is made, after the socket is created.
810 super(HTTPSConnectionPool, self)._validate_conn(conn)
812 # Force connect early to allow us to validate the connection.
813 if not getattr(conn, 'sock', None): # AppEngine might not have `.sock`
816 if not conn.is_verified:
818 'Unverified HTTPS request is being made. '
819 'Adding certificate verification is strongly advised. See: '
820 'https://urllib3.readthedocs.org/en/latest/security.html'),
821 InsecureRequestWarning)
824 def connection_from_url(url, **kw):
826 Given a url, return an :class:`.ConnectionPool` instance of its host.
828 This is a shortcut for not having to parse out the scheme, host, and port
829 of the url before creating an :class:`.ConnectionPool` instance.
832 Absolute URL string that must include the scheme. Port is optional.
835 Passes additional parameters to the constructor of the appropriate
836 :class:`.ConnectionPool`. Useful for specifying things like
837 timeout, maxsize, headers, etc.
841 >>> conn = connection_from_url('http://google.com/')
842 >>> r = conn.request('GET', '/')
844 scheme, host, port = get_host(url)
845 port = port or port_by_scheme.get(scheme, 80)
846 if scheme == 'https':
847 return HTTPSConnectionPool(host, port=port, **kw)
849 return HTTPConnectionPool(host, port=port, **kw)