1 # urllib3/poolmanager.py
2 # Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
4 # This module is part of urllib3 and is released under
5 # the MIT License: http://www.opensource.org/licenses/mit-license.php
10 from urllib.parse import urljoin
12 from urlparse import urljoin
14 from ._collections import RecentlyUsedContainer
15 from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
16 from .connectionpool import port_by_scheme
17 from .request import RequestMethods
18 from .util import parse_url
21 __all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url']
24 pool_classes_by_scheme = {
25 'http': HTTPConnectionPool,
26 'https': HTTPSConnectionPool,
29 log = logging.getLogger(__name__)
31 SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs',
35 class PoolManager(RequestMethods):
37 Allows for arbitrary requests while transparently keeping track of
38 necessary connection pools for you.
41 Number of connection pools to cache before discarding the least
45 Headers to include with all requests, unless other headers are given
48 :param \**connection_pool_kw:
49 Additional parameters are used to create fresh
50 :class:`urllib3.connectionpool.ConnectionPool` instances.
54 >>> manager = PoolManager(num_pools=2)
55 >>> r = manager.request('GET', 'http://google.com/')
56 >>> r = manager.request('GET', 'http://google.com/mail')
57 >>> r = manager.request('GET', 'http://yahoo.com/')
58 >>> len(manager.pools)
65 def __init__(self, num_pools=10, headers=None, **connection_pool_kw):
66 RequestMethods.__init__(self, headers)
67 self.connection_pool_kw = connection_pool_kw
68 self.pools = RecentlyUsedContainer(num_pools,
69 dispose_func=lambda p: p.close())
71 def _new_pool(self, scheme, host, port):
73 Create a new :class:`ConnectionPool` based on host, port and scheme.
75 This method is used to actually create the connection pools handed out
76 by :meth:`connection_from_url` and companion methods. It is intended
77 to be overridden for customization.
79 pool_cls = pool_classes_by_scheme[scheme]
80 kwargs = self.connection_pool_kw
82 kwargs = self.connection_pool_kw.copy()
83 for kw in SSL_KEYWORDS:
86 return pool_cls(host, port, **kwargs)
90 Empty our store of pools and direct them all to close.
92 This will not affect in-flight connections, but they will not be
93 re-used after completion.
97 def connection_from_host(self, host, port=None, scheme='http'):
99 Get a :class:`ConnectionPool` based on the host, port, and scheme.
101 If ``port`` isn't given, it will be derived from the ``scheme`` using
102 ``urllib3.connectionpool.port_by_scheme``.
105 scheme = scheme or 'http'
107 port = port or port_by_scheme.get(scheme, 80)
109 pool_key = (scheme, host, port)
111 with self.pools.lock:
112 # If the scheme, host, or port doesn't match existing open
113 # connections, open a new ConnectionPool.
114 pool = self.pools.get(pool_key)
118 # Make a fresh ConnectionPool of the desired type
119 pool = self._new_pool(scheme, host, port)
120 self.pools[pool_key] = pool
123 def connection_from_url(self, url):
125 Similar to :func:`urllib3.connectionpool.connection_from_url` but
126 doesn't pass any additional parameters to the
127 :class:`urllib3.connectionpool.ConnectionPool` constructor.
129 Additional parameters are taken from the :class:`.PoolManager`
133 return self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
135 def urlopen(self, method, url, redirect=True, **kw):
137 Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`
138 with custom cross-host redirect logic and only sends the request-uri
139 portion of the ``url``.
141 The given ``url`` parameter must be absolute, such that an appropriate
142 :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
145 conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
147 kw['assert_same_host'] = False
148 kw['redirect'] = False
149 if 'headers' not in kw:
150 kw['headers'] = self.headers
152 if self.proxy is not None and u.scheme == "http":
153 response = conn.urlopen(method, url, **kw)
155 response = conn.urlopen(method, u.request_uri, **kw)
157 redirect_location = redirect and response.get_redirect_location()
158 if not redirect_location:
161 # Support relative URLs for redirecting.
162 redirect_location = urljoin(url, redirect_location)
164 # RFC 2616, Section 10.3.4
165 if response.status == 303:
168 log.info("Redirecting %s -> %s" % (url, redirect_location))
169 kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown
170 kw['redirect'] = redirect
171 return self.urlopen(method, redirect_location, **kw)
174 class ProxyManager(PoolManager):
176 Behaves just like :class:`PoolManager`, but sends all requests through
177 the defined proxy, using the CONNECT method for HTTPS URLs.
180 The URL of the proxy to be used.
182 :param proxy_headers:
183 A dictionary contaning headers that will be sent to the proxy. In case
184 of HTTP they are being sent with each request, while in the
185 HTTPS/CONNECT case they are sent only once. Could be used for proxy
189 >>> proxy = urllib3.ProxyManager('http://localhost:3128/')
190 >>> r1 = proxy.request('GET', 'http://google.com/')
191 >>> r2 = proxy.request('GET', 'http://httpbin.org/')
194 >>> r3 = proxy.request('GET', 'https://httpbin.org/')
195 >>> r4 = proxy.request('GET', 'https://twitter.com/')
201 def __init__(self, proxy_url, num_pools=10, headers=None,
202 proxy_headers=None, **connection_pool_kw):
204 if isinstance(proxy_url, HTTPConnectionPool):
205 proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host,
207 proxy = parse_url(proxy_url)
209 port = port_by_scheme.get(proxy.scheme, 80)
210 proxy = proxy._replace(port=port)
212 self.proxy_headers = proxy_headers or {}
213 assert self.proxy.scheme in ("http", "https"), \
214 'Not supported proxy scheme %s' % self.proxy.scheme
215 connection_pool_kw['_proxy'] = self.proxy
216 connection_pool_kw['_proxy_headers'] = self.proxy_headers
217 super(ProxyManager, self).__init__(
218 num_pools, headers, **connection_pool_kw)
220 def connection_from_host(self, host, port=None, scheme='http'):
221 if scheme == "https":
222 return super(ProxyManager, self).connection_from_host(
225 return super(ProxyManager, self).connection_from_host(
226 self.proxy.host, self.proxy.port, self.proxy.scheme)
228 def _set_proxy_headers(self, url, headers=None):
230 Sets headers needed by proxies: specifically, the Accept and Host
231 headers. Only sets headers not provided by the user.
233 headers_ = {'Accept': '*/*'}
235 netloc = parse_url(url).netloc
237 headers_['Host'] = netloc
240 headers_.update(headers)
243 def urlopen(self, method, url, redirect=True, **kw):
244 "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
247 if u.scheme == "http":
248 # For proxied HTTPS requests, httplib sets the necessary headers
249 # on the CONNECT to the proxy. For HTTP, we'll definitely
250 # need to set 'Host' at the very least.
251 kw['headers'] = self._set_proxy_headers(url, kw.get('headers',
254 return super(ProxyManager, self).urlopen(method, url, redirect, **kw)
257 def proxy_from_url(url, **kw):
258 return ProxyManager(proxy_url=url, **kw)