1 # urllib3/connectionpool.py
2 # Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
4 # This module is part of urllib3 and is released under
5 # the MIT License: http://www.opensource.org/licenses/mit-license.php
10 from socket import error as SocketError, timeout as SocketTimeout
14 from queue import LifoQueue, Empty, Full
16 from Queue import LifoQueue, Empty, Full
17 import Queue as _ # Platform-specific: Windows
20 from .exceptions import (
31 from .packages.ssl_match_hostname import CertificateError
32 from .packages import six
33 from .connection import (
36 HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection,
37 HTTPException, BaseSSLError,
39 from .request import RequestMethods
40 from .response import HTTPResponse
44 is_connection_dropped,
49 xrange = six.moves.xrange
51 log = logging.getLogger(__name__)
57 class ConnectionPool(object):
59 Base class for all connection pools, such as
60 :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`.
66 def __init__(self, host, port=None):
67 # httplib doesn't like it when we include brackets in ipv6 addresses
68 host = host.strip('[]')
74 return '%s(host=%r, port=%r)' % (type(self).__name__,
77 # This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252
78 _blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK])
80 class HTTPConnectionPool(ConnectionPool, RequestMethods):
82 Thread-safe connection pool for one host.
85 Host used for this HTTP Connection (e.g. "localhost"), passed into
86 :class:`httplib.HTTPConnection`.
89 Port used for this HTTP Connection (None is equivalent to 80), passed
90 into :class:`httplib.HTTPConnection`.
93 Causes BadStatusLine to be raised if the status line can't be parsed
94 as a valid HTTP/1.0 or 1.1 status line, passed into
95 :class:`httplib.HTTPConnection`.
98 Only works in Python 2. This parameter is ignored in Python 3.
101 Socket timeout in seconds for each individual connection. This can
102 be a float or integer, which sets the timeout for the HTTP request,
103 or an instance of :class:`urllib3.util.Timeout` which gives you more
104 fine-grained control over request timeouts. After the constructor has
105 been parsed, this is always a `urllib3.util.Timeout` object.
108 Number of connections to save that can be reused. More than 1 is useful
109 in multithreaded situations. If ``block`` is set to false, more
110 connections will be created but they will not be saved once they've
114 If set to True, no more than ``maxsize`` connections will be used at
115 a time. When no free connections are available, the call will block
116 until a connection has been released. This is a useful side effect for
117 particular multithreaded situations where one does not want to use more
118 than maxsize connections per host to prevent flooding.
121 Headers to include with all requests, unless other headers are given
125 Parsed proxy URL, should not be used directly, instead, see
126 :class:`urllib3.connectionpool.ProxyManager`"
128 :param _proxy_headers:
129 A dictionary with proxy headers, should not be used directly,
130 instead, see :class:`urllib3.connectionpool.ProxyManager`"
134 ConnectionCls = HTTPConnection
136 def __init__(self, host, port=None, strict=False,
137 timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False,
138 headers=None, _proxy=None, _proxy_headers=None):
139 ConnectionPool.__init__(self, host, port)
140 RequestMethods.__init__(self, headers)
144 # This is for backwards compatibility and can be removed once a timeout
145 # can only be set to a Timeout object
146 if not isinstance(timeout, Timeout):
147 timeout = Timeout.from_float(timeout)
149 self.timeout = timeout
151 self.pool = self.QueueCls(maxsize)
155 self.proxy_headers = _proxy_headers or {}
157 # Fill the queue up so that doing get() on it will block properly
158 for _ in xrange(maxsize):
161 # These are mostly for testing and debugging purposes.
162 self.num_connections = 0
163 self.num_requests = 0
167 Return a fresh :class:`HTTPConnection`.
169 self.num_connections += 1
170 log.info("Starting new HTTP connection (%d): %s" %
171 (self.num_connections, self.host))
174 if not six.PY3: # Python 2
175 extra_params['strict'] = self.strict
177 conn = self.ConnectionCls(host=self.host, port=self.port,
178 timeout=self.timeout.connect_timeout,
180 if self.proxy is not None:
181 # Enable Nagle's algorithm for proxies, to avoid packet
186 def _get_conn(self, timeout=None):
188 Get a connection. Will return a pooled connection if one is available.
190 If no connections are available and :prop:`.block` is ``False``, then a
191 fresh connection is returned.
194 Seconds to wait before giving up and raising
195 :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and
196 :prop:`.block` is ``True``.
200 conn = self.pool.get(block=self.block, timeout=timeout)
202 except AttributeError: # self.pool is None
203 raise ClosedPoolError(self, "Pool is closed.")
207 raise EmptyPoolError(self,
208 "Pool reached maximum size and no more "
209 "connections are allowed.")
210 pass # Oh well, we'll create a new connection then
212 # If this is a persistent connection, check if it got disconnected
213 if conn and is_connection_dropped(conn):
214 log.info("Resetting dropped connection: %s" % self.host)
217 return conn or self._new_conn()
219 def _put_conn(self, conn):
221 Put a connection back into the pool.
224 Connection object for the current host and port as returned by
225 :meth:`._new_conn` or :meth:`._get_conn`.
227 If the pool is already full, the connection is closed and discarded
228 because we exceeded maxsize. If connections are discarded frequently,
229 then maxsize should be increased.
231 If the pool is closed, then the connection will be closed and discarded.
234 self.pool.put(conn, block=False)
235 return # Everything is dandy, done.
236 except AttributeError:
240 # This should never happen if self.block == True
241 log.warning("HttpConnectionPool is full, discarding connection: %s"
244 # Connection never got put back into the pool, close it.
248 def _get_timeout(self, timeout):
249 """ Helper that always returns a :class:`urllib3.util.Timeout` """
250 if timeout is _Default:
251 return self.timeout.clone()
253 if isinstance(timeout, Timeout):
254 return timeout.clone()
256 # User passed us an int/float. This is for backwards compatibility,
257 # can be removed later
258 return Timeout.from_float(timeout)
260 def _make_request(self, conn, method, url, timeout=_Default,
261 **httplib_request_kw):
263 Perform a request on a given urllib connection object taken from our
267 a connection from one of our connection pools
270 Socket timeout in seconds for the request. This can be a
271 float or integer, which will set the same timeout value for
272 the socket connect and the socket read, or an instance of
273 :class:`urllib3.util.Timeout`, which gives you more fine-grained
274 control over your timeouts.
276 self.num_requests += 1
278 timeout_obj = self._get_timeout(timeout)
281 timeout_obj.start_connect()
282 conn.timeout = timeout_obj.connect_timeout
283 # conn.request() calls httplib.*.request, not the method in
284 # urllib3.request. It also calls makefile (recv) on the socket.
285 conn.request(method, url, **httplib_request_kw)
286 except SocketTimeout:
287 raise ConnectTimeoutError(
288 self, "Connection to %s timed out. (connect timeout=%s)" %
289 (self.host, timeout_obj.connect_timeout))
291 # Reset the timeout for the recv() on the socket
292 read_timeout = timeout_obj.read_timeout
294 # App Engine doesn't have a sock attr
295 if hasattr(conn, 'sock'):
296 # In Python 3 socket.py will catch EAGAIN and return None when you
297 # try and read into the file pointer created by http.client, which
298 # instead raises a BadStatusLine exception. Instead of catching
299 # the exception and assuming all BadStatusLine exceptions are read
300 # timeouts, check for a zero timeout before making the request.
301 if read_timeout == 0:
302 raise ReadTimeoutError(
304 "Read timed out. (read timeout=%s)" % read_timeout)
305 if read_timeout is Timeout.DEFAULT_TIMEOUT:
306 conn.sock.settimeout(socket.getdefaulttimeout())
307 else: # None or a value
308 conn.sock.settimeout(read_timeout)
310 # Receive the response from the server
312 try: # Python 2.7+, use buffering of HTTP responses
313 httplib_response = conn.getresponse(buffering=True)
314 except TypeError: # Python 2.6 and older
315 httplib_response = conn.getresponse()
316 except SocketTimeout:
317 raise ReadTimeoutError(
318 self, url, "Read timed out. (read timeout=%s)" % read_timeout)
320 except BaseSSLError as e:
321 # Catch possible read timeouts thrown as SSL errors. If not the
322 # case, rethrow the original. We need to do this because of:
323 # http://bugs.python.org/issue10272
324 if 'timed out' in str(e) or \
325 'did not complete (read)' in str(e): # Python 2.6
326 raise ReadTimeoutError(self, url, "Read timed out.")
330 except SocketError as e: # Platform-specific: Python 2
331 # See the above comment about EAGAIN in Python 3. In Python 2 we
332 # have to specifically catch it and throw the timeout error
333 if e.errno in _blocking_errnos:
334 raise ReadTimeoutError(
336 "Read timed out. (read timeout=%s)" % read_timeout)
340 # AppEngine doesn't have a version attr.
341 http_version = getattr(conn, '_http_vsn_str', 'HTTP/?')
342 log.debug("\"%s %s %s\" %s %s" % (method, url, http_version,
343 httplib_response.status,
344 httplib_response.length))
345 return httplib_response
349 Close all pooled connections and disable the pool.
351 # Disable access to the pool
352 old_pool, self.pool = self.pool, None
356 conn = old_pool.get(block=False)
363 def is_same_host(self, url):
365 Check if the given ``url`` is a member of the same host as this
368 if url.startswith('/'):
371 # TODO: Add optional support for socket.gethostbyname checking.
372 scheme, host, port = get_host(url)
374 # Use explicit default port for comparison when none is given
375 if self.port and not port:
376 port = port_by_scheme.get(scheme)
377 elif not self.port and port == port_by_scheme.get(scheme):
380 return (scheme, host, port) == (self.scheme, self.host, self.port)
382 def urlopen(self, method, url, body=None, headers=None, retries=3,
383 redirect=True, assert_same_host=True, timeout=_Default,
384 pool_timeout=None, release_conn=None, **response_kw):
386 Get a connection from the pool and perform an HTTP request. This is the
387 lowest level call for making a request, so you'll need to specify all
392 More commonly, it's appropriate to use a convenience method provided
393 by :class:`.RequestMethods`, such as :meth:`request`.
397 `release_conn` will only behave as expected if
398 `preload_content=False` because we want to make
399 `preload_content=False` the default behaviour someday soon without
400 breaking backwards compatibility.
403 HTTP request method (such as GET, POST, PUT, etc.)
406 Data to send in the request body (useful for creating
407 POST requests, see HTTPConnectionPool.post_url for
411 Dictionary of custom headers to send, such as User-Agent,
412 If-None-Match, etc. If None, pool headers are used. If provided,
413 these headers completely replace any pool-specific headers.
416 Number of retries to allow before raising a MaxRetryError exception.
419 If True, automatically handle redirects (status codes 301, 302,
420 303, 307, 308). Each redirect counts as a retry.
422 :param assert_same_host:
423 If ``True``, will make sure that the host of the pool requests is
424 consistent else will raise HostChangedError. When False, you can
425 use the pool on an HTTP proxy and request foreign hosts.
428 If specified, overrides the default timeout for this one
429 request. It may be a float (in seconds) or an instance of
430 :class:`urllib3.util.Timeout`.
433 If set and the pool is set to block=True, then this method will
434 block for ``pool_timeout`` seconds and raise EmptyPoolError if no
435 connection is available within the time period.
438 If False, then the urlopen call will not release the connection
439 back into the pool once a response is received (but will release if
440 you read the entire contents of the response such as when
441 `preload_content=True`). This is useful if you're not preloading
442 the response's content immediately. You will need to call
443 ``r.release_conn()`` on the response ``r`` to return the connection
444 back into the pool. If None, it takes the value of
445 ``response_kw.get('preload_content', True)``.
447 :param \**response_kw:
448 Additional parameters are passed to
449 :meth:`urllib3.response.HTTPResponse.from_httplib`
452 headers = self.headers
455 raise MaxRetryError(self, url)
457 if release_conn is None:
458 release_conn = response_kw.get('preload_content', True)
461 if assert_same_host and not self.is_same_host(url):
462 raise HostChangedError(self, url, retries - 1)
466 # Merge the proxy headers. Only do this in HTTP. We have to copy the
467 # headers dict so we can safely change it without those changes being
468 # reflected in anyone else's copy.
469 if self.scheme == 'http':
470 headers = headers.copy()
471 headers.update(self.proxy_headers)
474 # Request a connection from the queue
475 conn = self._get_conn(timeout=pool_timeout)
477 # Make the request on the httplib connection object
478 httplib_response = self._make_request(conn, method, url,
480 body=body, headers=headers)
482 # If we're going to release the connection in ``finally:``, then
483 # the request doesn't need to know about the connection. Otherwise
484 # it will also try to release it and we'll have a double-release
486 response_conn = not release_conn and conn
488 # Import httplib's response into our own wrapper object
489 response = HTTPResponse.from_httplib(httplib_response,
491 connection=response_conn,
495 # The connection will be put back into the pool when
496 # ``response.release_conn()`` is called (implicitly by
497 # ``response.read()``)
501 raise EmptyPoolError(self, "No pool connections are available.")
503 except BaseSSLError as e:
506 except CertificateError as e:
510 except TimeoutError as e:
511 # Connection broken, discard.
513 # Save the error off for retry logic.
519 except (HTTPException, SocketError) as e:
520 # Connection broken, discard. It will be replaced next _get_conn().
522 # This is necessary so we can access e below
526 if isinstance(e, SocketError) and self.proxy is not None:
527 raise ProxyError('Cannot connect to proxy. '
528 'Socket error: %s.' % e)
530 raise MaxRetryError(self, url, e)
534 # Put the connection back to be reused. If the connection is
535 # expired then it will be None, which will get replaced with a
536 # fresh connection during _get_conn.
541 log.warn("Retrying (%d attempts remain) after connection "
542 "broken by '%r': %s" % (retries, err, url))
543 return self.urlopen(method, url, body, headers, retries - 1,
544 redirect, assert_same_host,
545 timeout=timeout, pool_timeout=pool_timeout,
546 release_conn=release_conn, **response_kw)
549 redirect_location = redirect and response.get_redirect_location()
550 if redirect_location:
551 if response.status == 303:
553 log.info("Redirecting %s -> %s" % (url, redirect_location))
554 return self.urlopen(method, redirect_location, body, headers,
555 retries - 1, redirect, assert_same_host,
556 timeout=timeout, pool_timeout=pool_timeout,
557 release_conn=release_conn, **response_kw)
562 class HTTPSConnectionPool(HTTPConnectionPool):
564 Same as :class:`.HTTPConnectionPool`, but HTTPS.
566 When Python is compiled with the :mod:`ssl` module, then
567 :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates,
568 instead of :class:`.HTTPSConnection`.
570 :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``,
571 ``assert_hostname`` and ``host`` in this order to verify connections.
572 If ``assert_hostname`` is False, no verification is done.
574 The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs`` and
575 ``ssl_version`` are only used if :mod:`ssl` is available and are fed into
576 :meth:`urllib3.util.ssl_wrap_socket` to upgrade the connection socket
581 ConnectionCls = HTTPSConnection
583 def __init__(self, host, port=None,
584 strict=False, timeout=None, maxsize=1,
585 block=False, headers=None,
586 _proxy=None, _proxy_headers=None,
587 key_file=None, cert_file=None, cert_reqs=None,
588 ca_certs=None, ssl_version=None,
589 assert_hostname=None, assert_fingerprint=None):
591 HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize,
592 block, headers, _proxy, _proxy_headers)
593 self.key_file = key_file
594 self.cert_file = cert_file
595 self.cert_reqs = cert_reqs
596 self.ca_certs = ca_certs
597 self.ssl_version = ssl_version
598 self.assert_hostname = assert_hostname
599 self.assert_fingerprint = assert_fingerprint
601 def _prepare_conn(self, conn):
603 Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket`
604 and establish the tunnel if proxy is used.
607 if isinstance(conn, VerifiedHTTPSConnection):
608 conn.set_cert(key_file=self.key_file,
609 cert_file=self.cert_file,
610 cert_reqs=self.cert_reqs,
611 ca_certs=self.ca_certs,
612 assert_hostname=self.assert_hostname,
613 assert_fingerprint=self.assert_fingerprint)
614 conn.ssl_version = self.ssl_version
616 if self.proxy is not None:
619 set_tunnel = conn.set_tunnel
620 except AttributeError: # Platform-specific: Python 2.6
621 set_tunnel = conn._set_tunnel
622 set_tunnel(self.host, self.port, self.proxy_headers)
623 # Establish tunnel connection early, because otherwise httplib
624 # would improperly set Host: header to proxy's IP:port.
631 Return a fresh :class:`httplib.HTTPSConnection`.
633 self.num_connections += 1
634 log.info("Starting new HTTPS connection (%d): %s"
635 % (self.num_connections, self.host))
637 if not self.ConnectionCls or self.ConnectionCls is DummyConnection:
638 # Platform-specific: Python without ssl
639 raise SSLError("Can't connect to HTTPS URL because the SSL "
640 "module is not available.")
642 actual_host = self.host
643 actual_port = self.port
644 if self.proxy is not None:
645 actual_host = self.proxy.host
646 actual_port = self.proxy.port
649 if not six.PY3: # Python 2
650 extra_params['strict'] = self.strict
652 conn = self.ConnectionCls(host=actual_host, port=actual_port,
653 timeout=self.timeout.connect_timeout,
655 if self.proxy is not None:
656 # Enable Nagle's algorithm for proxies, to avoid packet
660 return self._prepare_conn(conn)
663 def connection_from_url(url, **kw):
665 Given a url, return an :class:`.ConnectionPool` instance of its host.
667 This is a shortcut for not having to parse out the scheme, host, and port
668 of the url before creating an :class:`.ConnectionPool` instance.
671 Absolute URL string that must include the scheme. Port is optional.
674 Passes additional parameters to the constructor of the appropriate
675 :class:`.ConnectionPool`. Useful for specifying things like
676 timeout, maxsize, headers, etc.
680 >>> conn = connection_from_url('http://google.com/')
681 >>> r = conn.request('GET', '/')
683 scheme, host, port = get_host(url)
684 if scheme == 'https':
685 return HTTPSConnectionPool(host, port=port, **kw)
687 return HTTPConnectionPool(host, port=port, **kw)