1 from __future__ import absolute_import
5 from urllib.parse import urljoin
7 from urlparse import urljoin
9 from ._collections import RecentlyUsedContainer
10 from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
11 from .connectionpool import port_by_scheme
12 from .exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown
13 from .request import RequestMethods
14 from .util.url import parse_url
15 from .util.retry import Retry
18 __all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url']
21 log = logging.getLogger(__name__)
23 SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs',
24 'ssl_version', 'ca_cert_dir')
26 pool_classes_by_scheme = {
27 'http': HTTPConnectionPool,
28 'https': HTTPSConnectionPool,
32 class PoolManager(RequestMethods):
34 Allows for arbitrary requests while transparently keeping track of
35 necessary connection pools for you.
38 Number of connection pools to cache before discarding the least
42 Headers to include with all requests, unless other headers are given
45 :param \**connection_pool_kw:
46 Additional parameters are used to create fresh
47 :class:`urllib3.connectionpool.ConnectionPool` instances.
51 >>> manager = PoolManager(num_pools=2)
52 >>> r = manager.request('GET', 'http://google.com/')
53 >>> r = manager.request('GET', 'http://google.com/mail')
54 >>> r = manager.request('GET', 'http://yahoo.com/')
55 >>> len(manager.pools)
62 def __init__(self, num_pools=10, headers=None, **connection_pool_kw):
63 RequestMethods.__init__(self, headers)
64 self.connection_pool_kw = connection_pool_kw
65 self.pools = RecentlyUsedContainer(num_pools,
66 dispose_func=lambda p: p.close())
68 # Locally set the pool classes so other PoolManagers can override them.
69 self.pool_classes_by_scheme = pool_classes_by_scheme
74 def __exit__(self, exc_type, exc_val, exc_tb):
76 # Return False to re-raise any potential exceptions
79 def _new_pool(self, scheme, host, port):
81 Create a new :class:`ConnectionPool` based on host, port and scheme.
83 This method is used to actually create the connection pools handed out
84 by :meth:`connection_from_url` and companion methods. It is intended
85 to be overridden for customization.
87 pool_cls = self.pool_classes_by_scheme[scheme]
88 kwargs = self.connection_pool_kw
90 kwargs = self.connection_pool_kw.copy()
91 for kw in SSL_KEYWORDS:
94 return pool_cls(host, port, **kwargs)
98 Empty our store of pools and direct them all to close.
100 This will not affect in-flight connections, but they will not be
101 re-used after completion.
105 def connection_from_host(self, host, port=None, scheme='http'):
107 Get a :class:`ConnectionPool` based on the host, port, and scheme.
109 If ``port`` isn't given, it will be derived from the ``scheme`` using
110 ``urllib3.connectionpool.port_by_scheme``.
114 raise LocationValueError("No host specified.")
116 scheme = scheme or 'http'
117 port = port or port_by_scheme.get(scheme, 80)
118 pool_key = (scheme, host, port)
120 with self.pools.lock:
121 # If the scheme, host, or port doesn't match existing open
122 # connections, open a new ConnectionPool.
123 pool = self.pools.get(pool_key)
127 # Make a fresh ConnectionPool of the desired type
128 pool = self._new_pool(scheme, host, port)
129 self.pools[pool_key] = pool
133 def connection_from_url(self, url):
135 Similar to :func:`urllib3.connectionpool.connection_from_url` but
136 doesn't pass any additional parameters to the
137 :class:`urllib3.connectionpool.ConnectionPool` constructor.
139 Additional parameters are taken from the :class:`.PoolManager`
143 return self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
145 def urlopen(self, method, url, redirect=True, **kw):
147 Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`
148 with custom cross-host redirect logic and only sends the request-uri
149 portion of the ``url``.
151 The given ``url`` parameter must be absolute, such that an appropriate
152 :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
155 conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
157 kw['assert_same_host'] = False
158 kw['redirect'] = False
159 if 'headers' not in kw:
160 kw['headers'] = self.headers
162 if self.proxy is not None and u.scheme == "http":
163 response = conn.urlopen(method, url, **kw)
165 response = conn.urlopen(method, u.request_uri, **kw)
167 redirect_location = redirect and response.get_redirect_location()
168 if not redirect_location:
171 # Support relative URLs for redirecting.
172 redirect_location = urljoin(url, redirect_location)
174 # RFC 7231, Section 6.4.4
175 if response.status == 303:
178 retries = kw.get('retries')
179 if not isinstance(retries, Retry):
180 retries = Retry.from_int(retries, redirect=redirect)
183 retries = retries.increment(method, url, response=response, _pool=conn)
184 except MaxRetryError:
185 if retries.raise_on_redirect:
189 kw['retries'] = retries
190 kw['redirect'] = redirect
192 log.info("Redirecting %s -> %s", url, redirect_location)
193 return self.urlopen(method, redirect_location, **kw)
196 class ProxyManager(PoolManager):
198 Behaves just like :class:`PoolManager`, but sends all requests through
199 the defined proxy, using the CONNECT method for HTTPS URLs.
202 The URL of the proxy to be used.
204 :param proxy_headers:
205 A dictionary contaning headers that will be sent to the proxy. In case
206 of HTTP they are being sent with each request, while in the
207 HTTPS/CONNECT case they are sent only once. Could be used for proxy
211 >>> proxy = urllib3.ProxyManager('http://localhost:3128/')
212 >>> r1 = proxy.request('GET', 'http://google.com/')
213 >>> r2 = proxy.request('GET', 'http://httpbin.org/')
216 >>> r3 = proxy.request('GET', 'https://httpbin.org/')
217 >>> r4 = proxy.request('GET', 'https://twitter.com/')
223 def __init__(self, proxy_url, num_pools=10, headers=None,
224 proxy_headers=None, **connection_pool_kw):
226 if isinstance(proxy_url, HTTPConnectionPool):
227 proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host,
229 proxy = parse_url(proxy_url)
231 port = port_by_scheme.get(proxy.scheme, 80)
232 proxy = proxy._replace(port=port)
234 if proxy.scheme not in ("http", "https"):
235 raise ProxySchemeUnknown(proxy.scheme)
238 self.proxy_headers = proxy_headers or {}
240 connection_pool_kw['_proxy'] = self.proxy
241 connection_pool_kw['_proxy_headers'] = self.proxy_headers
243 super(ProxyManager, self).__init__(
244 num_pools, headers, **connection_pool_kw)
246 def connection_from_host(self, host, port=None, scheme='http'):
247 if scheme == "https":
248 return super(ProxyManager, self).connection_from_host(
251 return super(ProxyManager, self).connection_from_host(
252 self.proxy.host, self.proxy.port, self.proxy.scheme)
254 def _set_proxy_headers(self, url, headers=None):
256 Sets headers needed by proxies: specifically, the Accept and Host
257 headers. Only sets headers not provided by the user.
259 headers_ = {'Accept': '*/*'}
261 netloc = parse_url(url).netloc
263 headers_['Host'] = netloc
266 headers_.update(headers)
269 def urlopen(self, method, url, redirect=True, **kw):
270 "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
273 if u.scheme == "http":
274 # For proxied HTTPS requests, httplib sets the necessary headers
275 # on the CONNECT to the proxy. For HTTP, we'll definitely
276 # need to set 'Host' at the very least.
277 headers = kw.get('headers', self.headers)
278 kw['headers'] = self._set_proxy_headers(url, headers)
280 return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw)
283 def proxy_from_url(url, **kw):
284 return ProxyManager(proxy_url=url, **kw)