2 # Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
4 # This module is part of urllib3 and is released under
5 # the MIT License: http://www.opensource.org/licenses/mit-license.php
8 from base64 import b64encode
9 from binascii import hexlify, unhexlify
10 from collections import namedtuple
11 from hashlib import md5, sha1
12 from socket import error as SocketError, _GLOBAL_DEFAULT_TIMEOUT
16 from select import poll, POLLIN
17 except ImportError: # `poll` doesn't exist on OSX and other platforms
20 from select import select
21 except ImportError: # `select` doesn't exist on AppEngine.
24 try: # Test for SSL features
29 from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23
30 from ssl import SSLContext # Modern SSL?
31 from ssl import HAS_SNI # Has SNI?
35 from .packages import six
36 from .exceptions import LocationParseError, SSLError, TimeoutStateError
40 # The default timeout to use for socket connections. This is the attribute used
41 # by httplib to define the default timeout
46 Retrieve the current time, this function is mocked out in unit testing.
51 class Timeout(object):
53 Utility object for storing timeout values.
57 .. code-block:: python
59 timeout = urllib3.util.Timeout(connect=2.0, read=7.0)
60 pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout)
61 pool.request(...) # Etc, etc
64 The maximum amount of time to wait for a connection attempt to a server
65 to succeed. Omitting the parameter will default the connect timeout to
66 the system default, probably `the global default timeout in socket.py
67 <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
68 None will set an infinite timeout for connection attempts.
70 :type connect: integer, float, or None
73 The maximum amount of time to wait between consecutive
74 read operations for a response from the server. Omitting
75 the parameter will default the read timeout to the system
76 default, probably `the global default timeout in socket.py
77 <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
78 None will set an infinite timeout.
80 :type read: integer, float, or None
83 This combines the connect and read timeouts into one; the read timeout
84 will be set to the time leftover from the connect attempt. In the
85 event that both a connect timeout and a total are specified, or a read
86 timeout and a total are specified, the shorter timeout will be applied.
90 :type total: integer, float, or None
94 Many factors can affect the total amount of time for urllib3 to return
95 an HTTP response. Specifically, Python's DNS resolver does not obey the
96 timeout specified on the socket. Other factors that can affect total
97 request time include high CPU load, high swap, the program running at a
98 low priority level, or other behaviors. The observed running time for
99 urllib3 to return a response may be greater than the value passed to
102 In addition, the read and total timeouts only measure the time between
103 read operations on the socket connecting the client and the server,
104 not the total amount of time for the request to return a complete
105 response. For most requests, the timeout is raised because the server
106 has not sent the first byte in the specified time. This is not always
107 the case; if a server streams one byte every fifteen seconds, a timeout
108 of 20 seconds will not ever trigger, even though the request will
109 take several minutes to complete.
111 If your goal is to cut off any request after a set amount of wall clock
112 time, consider having a second "watcher" thread to cut off a slow
116 #: A sentinel object representing the default timeout value
117 DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT
119 def __init__(self, total=None, connect=_Default, read=_Default):
120 self._connect = self._validate_timeout(connect, 'connect')
121 self._read = self._validate_timeout(read, 'read')
122 self.total = self._validate_timeout(total, 'total')
123 self._start_connect = None
126 return '%s(connect=%r, read=%r, total=%r)' % (
127 type(self).__name__, self._connect, self._read, self.total)
131 def _validate_timeout(cls, value, name):
132 """ Check that a timeout attribute is valid
134 :param value: The timeout value to validate
135 :param name: The name of the timeout attribute to validate. This is used
136 for clear error messages
138 :raises ValueError: if the type is not an integer or a float, or if it
139 is a numeric value less than zero
141 if value is _Default:
142 return cls.DEFAULT_TIMEOUT
144 if value is None or value is cls.DEFAULT_TIMEOUT:
149 except (TypeError, ValueError):
150 raise ValueError("Timeout value %s was %s, but it must be an "
151 "int or float." % (name, value))
155 raise ValueError("Attempted to set %s timeout to %s, but the "
156 "timeout cannot be set to a value less "
157 "than 0." % (name, value))
158 except TypeError: # Python 3
159 raise ValueError("Timeout value %s was %s, but it must be an "
160 "int or float." % (name, value))
165 def from_float(cls, timeout):
166 """ Create a new Timeout from a legacy timeout value.
168 The timeout value used by httplib.py sets the same timeout on the
169 connect(), and recv() socket requests. This creates a :class:`Timeout`
170 object that sets the individual timeouts to the ``timeout`` value passed
173 :param timeout: The legacy timeout value
174 :type timeout: integer, float, sentinel default object, or None
175 :return: a Timeout object
176 :rtype: :class:`Timeout`
178 return Timeout(read=timeout, connect=timeout)
181 """ Create a copy of the timeout object
183 Timeout properties are stored per-pool but each request needs a fresh
184 Timeout object to ensure each one has its own start/stop configured.
186 :return: a copy of the timeout object
187 :rtype: :class:`Timeout`
189 # We can't use copy.deepcopy because that will also create a new object
190 # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to
191 # detect the user default.
192 return Timeout(connect=self._connect, read=self._read,
195 def start_connect(self):
196 """ Start the timeout clock, used during a connect() attempt
198 :raises urllib3.exceptions.TimeoutStateError: if you attempt
199 to start a timer that has been started already.
201 if self._start_connect is not None:
202 raise TimeoutStateError("Timeout timer has already been started.")
203 self._start_connect = current_time()
204 return self._start_connect
206 def get_connect_duration(self):
207 """ Gets the time elapsed since the call to :meth:`start_connect`.
209 :return: the elapsed time
211 :raises urllib3.exceptions.TimeoutStateError: if you attempt
212 to get duration for a timer that hasn't been started.
214 if self._start_connect is None:
215 raise TimeoutStateError("Can't get connect duration for timer "
216 "that has not started.")
217 return current_time() - self._start_connect
220 def connect_timeout(self):
221 """ Get the value to use when setting a connection timeout.
223 This will be a positive float or integer, the value None
224 (never timeout), or the default system timeout.
226 :return: the connect timeout
227 :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
229 if self.total is None:
232 if self._connect is None or self._connect is self.DEFAULT_TIMEOUT:
235 return min(self._connect, self.total)
238 def read_timeout(self):
239 """ Get the value for the read timeout.
241 This assumes some time has elapsed in the connection timeout and
242 computes the read timeout appropriately.
244 If self.total is set, the read timeout is dependent on the amount of
245 time taken by the connect timeout. If the connection time has not been
246 established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be
249 :return: the value to use for the read timeout
250 :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
251 :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect`
252 has not yet been called on this object.
254 if (self.total is not None and
255 self.total is not self.DEFAULT_TIMEOUT and
256 self._read is not None and
257 self._read is not self.DEFAULT_TIMEOUT):
258 # in case the connect timeout has not yet been established.
259 if self._start_connect is None:
261 return max(0, min(self.total - self.get_connect_duration(),
263 elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT:
264 return max(0, self.total - self.get_connect_duration())
269 class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])):
271 Datastructure for representing an HTTP URL. Used as a return value for
276 def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None):
277 return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment)
281 """For backwards-compatibility with urlparse. We're nice like that."""
285 def request_uri(self):
286 """Absolute path including the query string."""
287 uri = self.path or '/'
289 if self.query is not None:
290 uri += '?' + self.query
296 """Network location including host and port"""
298 return '%s:%d' % (self.host, self.port)
302 def split_first(s, delims):
304 Given a string and an iterable of delimiters, split on the first found
305 delimiter. Return two split parts and the matched delimiter.
307 If not found, then the first part is the full input string.
311 >>> split_first('foo/bar?baz', '?/=')
312 ('foo', 'bar?baz', '/')
313 >>> split_first('foo/bar?baz', '123')
314 ('foo/bar?baz', '', None)
316 Scales linearly with number of delims. Not ideal for large number of delims.
325 if min_idx is None or idx < min_idx:
329 if min_idx is None or min_idx < 0:
332 return s[:min_idx], s[min_idx+1:], min_delim
337 Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
338 performed to parse incomplete urls. Fields not provided will be None.
340 Partly backwards-compatible with :mod:`urlparse`.
344 >>> parse_url('http://google.com/mail/')
345 Url(scheme='http', host='google.com', port=None, path='/', ...)
346 >>> parse_url('google.com:80')
347 Url(scheme=None, host='google.com', port=80, path=None, ...)
348 >>> parse_url('/foo?bar')
349 Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
352 # While this code has overlap with stdlib's urlparse, it is much
353 # simplified for our needs and less annoying.
354 # Additionally, this implementations does silly things to be optimal
367 scheme, url = url.split('://', 1)
369 # Find the earliest Authority Terminator
370 # (http://tools.ietf.org/html/rfc3986#section-3.2)
371 url, path_, delim = split_first(url, ['/', '?', '#'])
374 # Reassemble the path
379 # Last '@' denotes end of auth part
380 auth, url = url.rsplit('@', 1)
383 if url and url[0] == '[':
384 host, url = url.split(']', 1)
389 _host, port = url.split(':', 1)
395 # If given, ports must be integers.
396 if not port.isdigit():
397 raise LocationParseError("Failed to parse: %s" % url)
400 # Blank ports are cool, too. (rfc3986#section-3.2.3)
403 elif not host and url:
407 return Url(scheme, auth, host, port, path, query, fragment)
411 path, fragment = path.split('#', 1)
415 path, query = path.split('?', 1)
417 return Url(scheme, auth, host, port, path, query, fragment)
422 Deprecated. Use :func:`.parse_url` instead.
425 return p.scheme or 'http', p.hostname, p.port
428 def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
429 basic_auth=None, proxy_basic_auth=None):
431 Shortcuts for generating request headers.
434 If ``True``, adds 'connection: keep-alive' header.
436 :param accept_encoding:
437 Can be a boolean, list, or string.
438 ``True`` translates to 'gzip,deflate'.
439 List will get joined by comma.
440 String will be used as provided.
443 String representing the user-agent you want, such as
447 Colon-separated username:password string for 'authorization: basic ...'
450 :param proxy_basic_auth:
451 Colon-separated username:password string for 'proxy-authorization: basic ...'
456 >>> make_headers(keep_alive=True, user_agent="Batman/1.0")
457 {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
458 >>> make_headers(accept_encoding=True)
459 {'accept-encoding': 'gzip,deflate'}
463 if isinstance(accept_encoding, str):
465 elif isinstance(accept_encoding, list):
466 accept_encoding = ','.join(accept_encoding)
468 accept_encoding = 'gzip,deflate'
469 headers['accept-encoding'] = accept_encoding
472 headers['user-agent'] = user_agent
475 headers['connection'] = 'keep-alive'
478 headers['authorization'] = 'Basic ' + \
479 b64encode(six.b(basic_auth)).decode('utf-8')
482 headers['proxy-authorization'] = 'Basic ' + \
483 b64encode(six.b(proxy_basic_auth)).decode('utf-8')
488 def is_connection_dropped(conn): # Platform-specific
490 Returns True if the connection is dropped and should be closed.
493 :class:`httplib.HTTPConnection` object.
495 Note: For platforms like AppEngine, this will always return ``False`` to
496 let the platform handle connection recycling transparently for us.
498 sock = getattr(conn, 'sock', False)
499 if not sock: # Platform-specific: AppEngine
503 if not select: # Platform-specific: AppEngine
507 return select([sock], [], [], 0.0)[0]
511 # This version is better on platforms that support it.
513 p.register(sock, POLLIN)
514 for (fno, ev) in p.poll(0.0):
515 if fno == sock.fileno():
516 # Either data is buffered (bad), or the connection is dropped.
520 def resolve_cert_reqs(candidate):
522 Resolves the argument to a numeric constant, which can be passed to
523 the wrap_socket function/method from the ssl module.
524 Defaults to :data:`ssl.CERT_NONE`.
525 If given a string it is assumed to be the name of the constant in the
526 :mod:`ssl` module or its abbrevation.
527 (So you can specify `REQUIRED` instead of `CERT_REQUIRED`.
528 If it's neither `None` nor a string we assume it is already the numeric
529 constant which can directly be passed to wrap_socket.
531 if candidate is None:
534 if isinstance(candidate, str):
535 res = getattr(ssl, candidate, None)
537 res = getattr(ssl, 'CERT_' + candidate)
543 def resolve_ssl_version(candidate):
545 like resolve_cert_reqs
547 if candidate is None:
548 return PROTOCOL_SSLv23
550 if isinstance(candidate, str):
551 res = getattr(ssl, candidate, None)
553 res = getattr(ssl, 'PROTOCOL_' + candidate)
559 def assert_fingerprint(cert, fingerprint):
561 Checks if given fingerprint matches the supplied certificate.
564 Certificate as bytes object.
566 Fingerprint as string of hexdigits, can be interspersed by colons.
569 # Maps the length of a digest to a possible hash function producing
576 fingerprint = fingerprint.replace(':', '').lower()
578 digest_length, rest = divmod(len(fingerprint), 2)
580 if rest or digest_length not in hashfunc_map:
581 raise SSLError('Fingerprint is of invalid length.')
583 # We need encode() here for py32; works on py2 and p33.
584 fingerprint_bytes = unhexlify(fingerprint.encode())
586 hashfunc = hashfunc_map[digest_length]
588 cert_digest = hashfunc(cert).digest()
590 if not cert_digest == fingerprint_bytes:
591 raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".'
592 .format(hexlify(fingerprint_bytes),
593 hexlify(cert_digest)))
595 def is_fp_closed(obj):
597 Checks whether a given file-like object is closed.
600 The file-like object to check.
602 if hasattr(obj, 'fp'):
603 # Object is a container for another file-like object that gets released
604 # on exhaustion (e.g. HTTPResponse)
605 return obj.fp is None
610 if SSLContext is not None: # Python 3.2+
611 def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
612 ca_certs=None, server_hostname=None,
615 All arguments except `server_hostname` have the same meaning as for
616 :func:`ssl.wrap_socket`
618 :param server_hostname:
619 Hostname of the expected certificate
621 context = SSLContext(ssl_version)
622 context.verify_mode = cert_reqs
624 # Disable TLS compression to migitate CRIME attack (issue #309)
625 OP_NO_COMPRESSION = 0x20000
626 context.options |= OP_NO_COMPRESSION
630 context.load_verify_locations(ca_certs)
631 # Py32 raises IOError
632 # Py33 raises FileNotFoundError
633 except Exception as e: # Reraise as SSLError
636 # FIXME: This block needs a test.
637 context.load_cert_chain(certfile, keyfile)
638 if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI
639 return context.wrap_socket(sock, server_hostname=server_hostname)
640 return context.wrap_socket(sock)
642 else: # Python 3.1 and earlier
643 def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
644 ca_certs=None, server_hostname=None,
646 return wrap_socket(sock, keyfile=keyfile, certfile=certfile,
647 ca_certs=ca_certs, cert_reqs=cert_reqs,
648 ssl_version=ssl_version)