1 # -*- coding: utf-8 -*-
7 This module provides utility functions that are used within Requests
8 that are also useful for external consumption.
22 from . import __version__
24 from .compat import parse_http_list as _parse_list_header
25 from .compat import (quote, urlparse, bytes, str, OrderedDict, unquote, is_py2,
26 builtin_str, getproxies, proxy_bypass, urlunparse,
28 from .cookies import RequestsCookieJar, cookiejar_from_dict
29 from .structures import CaseInsensitiveDict
30 from .exceptions import InvalidURL, FileModeWarning
32 _hush_pyflakes = (RequestsCookieJar,)
34 NETRC_FILES = ('.netrc', '_netrc')
36 DEFAULT_CA_BUNDLE_PATH = certs.where()
39 def dict_to_sequence(d):
40 """Returns an internal sequence dictionary update."""
42 if hasattr(d, 'items'):
52 if hasattr(o, '__len__'):
55 elif hasattr(o, 'len'):
58 elif hasattr(o, 'getvalue'):
59 # e.g. BytesIO, cStringIO.StringIO
60 total_length = len(o.getvalue())
62 elif hasattr(o, 'fileno'):
65 except io.UnsupportedOperation:
68 total_length = os.fstat(fileno).st_size
70 # Having used fstat to determine the file length, we need to
71 # confirm that this file was opened up in binary mode.
74 "Requests has determined the content-length for this "
75 "request using the binary size of the file: however, the "
76 "file has been opened in text mode (i.e. without the 'b' "
77 "flag in the mode). This may lead to an incorrect "
78 "content-length. In Requests 3.0, support will be removed "
79 "for files in text mode."),
83 if hasattr(o, 'tell'):
85 current_position = o.tell()
86 except (OSError, IOError):
87 # This can happen in some weird situations, such as when the file
88 # is actually a special file descriptor like stdin. In this
89 # instance, we don't know what the length is, so set it to zero and
90 # let requests chunk it instead.
91 current_position = total_length
93 return max(0, total_length - current_position)
96 def get_netrc_auth(url, raise_errors=False):
97 """Returns the Requests tuple auth for a given url from netrc."""
100 from netrc import netrc, NetrcParseError
104 for f in NETRC_FILES:
106 loc = os.path.expanduser('~/{0}'.format(f))
108 # os.path.expanduser can fail when $HOME is undefined and
109 # getpwuid fails. See http://bugs.python.org/issue20164 &
110 # https://github.com/kennethreitz/requests/issues/1846
113 if os.path.exists(loc):
117 # Abort early if there isn't one.
118 if netrc_path is None:
123 # Strip port numbers from netloc. This weird `if...encode`` dance is
124 # used for Python 3.2, which doesn't support unicode literals.
126 if isinstance(url, str):
127 splitstr = splitstr.decode('ascii')
128 host = ri.netloc.split(splitstr)[0]
131 _netrc = netrc(netrc_path).authenticators(host)
133 # Return with login / password
134 login_i = (0 if _netrc[0] else 1)
135 return (_netrc[login_i], _netrc[2])
136 except (NetrcParseError, IOError):
137 # If there was a parsing error or a permissions issue reading the file,
138 # we'll just skip netrc auth unless explicitly asked to raise errors.
142 # AppEngine hackiness.
143 except (ImportError, AttributeError):
147 def guess_filename(obj):
148 """Tries to guess the filename of the given object."""
149 name = getattr(obj, 'name', None)
150 if (name and isinstance(name, basestring) and name[0] != '<' and
152 return os.path.basename(name)
155 def from_key_val_list(value):
156 """Take an object and test to see if it can be represented as a
157 dictionary. Unless it can not be represented as such, return an
162 >>> from_key_val_list([('key', 'val')])
163 OrderedDict([('key', 'val')])
164 >>> from_key_val_list('string')
165 ValueError: need more than 1 value to unpack
166 >>> from_key_val_list({'key': 'val'})
167 OrderedDict([('key', 'val')])
172 if isinstance(value, (str, bytes, bool, int)):
173 raise ValueError('cannot encode objects that are not 2-tuples')
175 return OrderedDict(value)
178 def to_key_val_list(value):
179 """Take an object and test to see if it can be represented as a
180 dictionary. If it can be, return a list of tuples, e.g.,
184 >>> to_key_val_list([('key', 'val')])
186 >>> to_key_val_list({'key': 'val'})
188 >>> to_key_val_list('string')
189 ValueError: cannot encode objects that are not 2-tuples.
194 if isinstance(value, (str, bytes, bool, int)):
195 raise ValueError('cannot encode objects that are not 2-tuples')
197 if isinstance(value, collections.Mapping):
198 value = value.items()
203 # From mitsuhiko/werkzeug (used with permission).
204 def parse_list_header(value):
205 """Parse lists as described by RFC 2068 Section 2.
207 In particular, parse comma-separated lists where the elements of
208 the list may include quoted-strings. A quoted-string could
209 contain a comma. A non-quoted string could have quotes in the
210 middle. Quotes are removed automatically after parsing.
212 It basically works like :func:`parse_set_header` just that items
213 may appear multiple times and case sensitivity is preserved.
215 The return value is a standard :class:`list`:
217 >>> parse_list_header('token, "quoted value"')
218 ['token', 'quoted value']
220 To create a header from the :class:`list` again, use the
221 :func:`dump_header` function.
223 :param value: a string with a list header.
224 :return: :class:`list`
227 for item in _parse_list_header(value):
228 if item[:1] == item[-1:] == '"':
229 item = unquote_header_value(item[1:-1])
234 # From mitsuhiko/werkzeug (used with permission).
235 def parse_dict_header(value):
236 """Parse lists of key, value pairs as described by RFC 2068 Section 2 and
237 convert them into a python dict:
239 >>> d = parse_dict_header('foo="is a fish", bar="as well"')
242 >>> sorted(d.items())
243 [('bar', 'as well'), ('foo', 'is a fish')]
245 If there is no value for a key it will be `None`:
247 >>> parse_dict_header('key_without_value')
248 {'key_without_value': None}
250 To create a header from the :class:`dict` again, use the
251 :func:`dump_header` function.
253 :param value: a string with a dict header.
254 :return: :class:`dict`
257 for item in _parse_list_header(value):
261 name, value = item.split('=', 1)
262 if value[:1] == value[-1:] == '"':
263 value = unquote_header_value(value[1:-1])
268 # From mitsuhiko/werkzeug (used with permission).
269 def unquote_header_value(value, is_filename=False):
270 r"""Unquotes a header value. (Reversal of :func:`quote_header_value`).
271 This does not use the real unquoting but what browsers are actually
274 :param value: the header value to unquote.
276 if value and value[0] == value[-1] == '"':
277 # this is not the real unquoting, but fixing this so that the
278 # RFC is met will result in bugs with internet explorer and
279 # probably some other browsers as well. IE for example is
280 # uploading files with "C:\foo\bar.txt" as filename
283 # if this is a filename and the starting characters look like
284 # a UNC path, then just return the value without quotes. Using the
285 # replace sequence below on a UNC path has the effect of turning
286 # the leading double slash into a single slash and then
287 # _fix_ie_filename() doesn't work correctly. See #458.
288 if not is_filename or value[:2] != '\\\\':
289 return value.replace('\\\\', '\\').replace('\\"', '"')
293 def dict_from_cookiejar(cj):
294 """Returns a key/value dictionary from a CookieJar.
296 :param cj: CookieJar object to extract cookies from.
302 cookie_dict[cookie.name] = cookie.value
307 def add_dict_to_cookiejar(cj, cookie_dict):
308 """Returns a CookieJar from a key/value dictionary.
310 :param cj: CookieJar to insert cookies into.
311 :param cookie_dict: Dict of key/values to insert into CookieJar.
314 cj2 = cookiejar_from_dict(cookie_dict)
319 def get_encodings_from_content(content):
320 """Returns encodings from given content string.
322 :param content: bytestring to extract encodings from.
325 'In requests 3.0, get_encodings_from_content will be removed. For '
326 'more information, please see the discussion on issue #2266. (This'
327 ' warning should only appear once.)'),
330 charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
331 pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
332 xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')
334 return (charset_re.findall(content) +
335 pragma_re.findall(content) +
336 xml_re.findall(content))
339 def get_encoding_from_headers(headers):
340 """Returns encodings from given HTTP Header Dict.
342 :param headers: dictionary to extract encoding from.
345 content_type = headers.get('content-type')
350 content_type, params = cgi.parse_header(content_type)
352 if 'charset' in params:
353 return params['charset'].strip("'\"")
355 if 'text' in content_type:
359 def stream_decode_response_unicode(iterator, r):
360 """Stream decodes a iterator."""
362 if r.encoding is None:
363 for item in iterator:
367 decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace')
368 for chunk in iterator:
369 rv = decoder.decode(chunk)
372 rv = decoder.decode(b'', final=True)
377 def iter_slices(string, slice_length):
378 """Iterate over slices of a string."""
380 while pos < len(string):
381 yield string[pos:pos + slice_length]
385 def get_unicode_from_response(r):
386 """Returns the requested content back in unicode.
388 :param r: Response object to get unicode content from.
392 1. charset from content-type
393 2. fall back and replace all unicode characters
397 'In requests 3.0, get_unicode_from_response will be removed. For '
398 'more information, please see the discussion on issue #2266. (This'
399 ' warning should only appear once.)'),
404 # Try charset from content-type
405 encoding = get_encoding_from_headers(r.headers)
409 return str(r.content, encoding)
411 tried_encodings.append(encoding)
415 return str(r.content, encoding, errors='replace')
420 # The unreserved URI characters (RFC 3986)
421 UNRESERVED_SET = frozenset(
422 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
426 def unquote_unreserved(uri):
427 """Un-escape any percent-escape sequences in a URI that are unreserved
428 characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
430 parts = uri.split('%')
431 for i in range(1, len(parts)):
433 if len(h) == 2 and h.isalnum():
437 raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
439 if c in UNRESERVED_SET:
440 parts[i] = c + parts[i][2:]
442 parts[i] = '%' + parts[i]
444 parts[i] = '%' + parts[i]
445 return ''.join(parts)
448 def requote_uri(uri):
449 """Re-quote the given URI.
451 This function passes the given URI through an unquote/quote cycle to
452 ensure that it is fully and consistently quoted.
454 safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
455 safe_without_percent = "!#$&'()*+,/:;=?@[]~"
457 # Unquote only the unreserved characters
458 # Then quote only illegal characters (do not quote reserved,
459 # unreserved, or '%')
460 return quote(unquote_unreserved(uri), safe=safe_with_percent)
462 # We couldn't unquote the given URI, so let's try quoting it, but
463 # there may be unquoted '%'s in the URI. We need to make sure they're
464 # properly quoted so they do not cause issues elsewhere.
465 return quote(uri, safe=safe_without_percent)
468 def address_in_network(ip, net):
470 This function allows you to check if on IP belongs to a network subnet
471 Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24
472 returns False if ip = 192.168.1.1 and net = 192.168.100.0/24
474 ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0]
475 netaddr, bits = net.split('/')
476 netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0]
477 network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask
478 return (ipaddr & netmask) == (network & netmask)
481 def dotted_netmask(mask):
483 Converts mask from /xx format to xxx.xxx.xxx.xxx
484 Example: if mask is 24 function returns 255.255.255.0
486 bits = 0xffffffff ^ (1 << 32 - mask) - 1
487 return socket.inet_ntoa(struct.pack('>I', bits))
490 def is_ipv4_address(string_ip):
492 socket.inet_aton(string_ip)
498 def is_valid_cidr(string_network):
499 """Very simple check of the cidr format in no_proxy variable"""
500 if string_network.count('/') == 1:
502 mask = int(string_network.split('/')[1])
506 if mask < 1 or mask > 32:
510 socket.inet_aton(string_network.split('/')[0])
518 def should_bypass_proxies(url):
520 Returns whether we should bypass proxies or not.
522 get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())
524 # First check whether no_proxy is defined. If it is, check that the URL
525 # we're getting isn't in the no_proxy list.
526 no_proxy = get_proxy('no_proxy')
527 netloc = urlparse(url).netloc
530 # We need to check whether we match here. We need to see if we match
531 # the end of the netloc, both with and without the port.
533 host for host in no_proxy.replace(' ', '').split(',') if host
536 ip = netloc.split(':')[0]
537 if is_ipv4_address(ip):
538 for proxy_ip in no_proxy:
539 if is_valid_cidr(proxy_ip):
540 if address_in_network(ip, proxy_ip):
543 for host in no_proxy:
544 if netloc.endswith(host) or netloc.split(':')[0].endswith(host):
545 # The URL does match something in no_proxy, so we don't want
546 # to apply the proxies on this URL.
549 # If the system proxy settings indicate that this URL should be bypassed,
551 # The proxy_bypass function is incredibly buggy on OS X in early versions
552 # of Python 2.6, so allow this call to fail. Only catch the specific
553 # exceptions we've seen, though: this call failing in other ways can reveal
554 # legitimate problems.
556 bypass = proxy_bypass(netloc)
557 except (TypeError, socket.gaierror):
566 def get_environ_proxies(url):
567 """Return a dict of environment proxies."""
568 if should_bypass_proxies(url):
574 def select_proxy(url, proxies):
575 """Select a proxy for the url, if applicable.
577 :param url: The url being for the request
578 :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs
580 proxies = proxies or {}
581 urlparts = urlparse(url)
582 if urlparts.hostname is None:
585 proxy = proxies.get(urlparts.scheme+'://'+urlparts.hostname)
587 proxy = proxies.get(urlparts.scheme)
591 def default_user_agent(name="python-requests"):
592 """Return a string representing the default user agent."""
593 return '%s/%s' % (name, __version__)
596 def default_headers():
597 return CaseInsensitiveDict({
598 'User-Agent': default_user_agent(),
599 'Accept-Encoding': ', '.join(('gzip', 'deflate')),
601 'Connection': 'keep-alive',
605 def parse_header_links(value):
606 """Return a dict of parsed link headers proxies.
608 i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg"
614 replace_chars = ' \'"'
616 for val in re.split(', *<', value):
618 url, params = val.split(';', 1)
620 url, params = val, ''
622 link = {'url': url.strip('<> \'"')}
624 for param in params.split(';'):
626 key, value = param.split('=')
630 link[key.strip(replace_chars)] = value.strip(replace_chars)
637 # Null bytes; no need to recreate these on each call to guess_json_utf
638 _null = '\x00'.encode('ascii') # encoding to ASCII for Python 3
643 def guess_json_utf(data):
644 # JSON always starts with two ASCII characters, so detection is as
645 # easy as counting the nulls and from their location and count
646 # determine the encoding. Also detect a BOM, if present.
648 if sample in (codecs.BOM_UTF32_LE, codecs.BOM32_BE):
649 return 'utf-32' # BOM included
650 if sample[:3] == codecs.BOM_UTF8:
651 return 'utf-8-sig' # BOM included, MS style (discouraged)
652 if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
653 return 'utf-16' # BOM included
654 nullcount = sample.count(_null)
658 if sample[::2] == _null2: # 1st and 3rd are null
660 if sample[1::2] == _null2: # 2nd and 4th are null
662 # Did not detect 2 valid UTF-16 ascii-range characters
664 if sample[:3] == _null3:
666 if sample[1:] == _null3:
668 # Did not detect a valid UTF-32 ascii-range character
672 def prepend_scheme_if_needed(url, new_scheme):
673 """Given a URL that may or may not have a scheme, prepend the given scheme.
674 Does not replace a present scheme with the one provided as an argument."""
675 scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme)
677 # urlparse is a finicky beast, and sometimes decides that there isn't a
678 # netloc present. Assume that it's being over-cautious, and switch netloc
679 # and path if urlparse decided there was no netloc.
681 netloc, path = path, netloc
683 return urlunparse((scheme, netloc, path, params, query, fragment))
686 def get_auth_from_url(url):
687 """Given a url with authentication components, extract them into a tuple of
688 username,password."""
689 parsed = urlparse(url)
692 auth = (unquote(parsed.username), unquote(parsed.password))
693 except (AttributeError, TypeError):
699 def to_native_string(string, encoding='ascii'):
701 Given a string object, regardless of type, returns a representation of that
702 string in the native string type, encoding and decoding where necessary.
703 This assumes ASCII unless told otherwise.
705 if isinstance(string, builtin_str):
709 out = string.encode(encoding)
711 out = string.decode(encoding)
716 def urldefragauth(url):
718 Given a url remove the fragment and the authentication part
720 scheme, netloc, path, params, query, fragment = urlparse(url)
722 # see func:`prepend_scheme_if_needed`
724 netloc, path = path, netloc
726 netloc = netloc.rsplit('@', 1)[-1]
728 return urlunparse((scheme, netloc, path, params, query, ''))