gerrit.onap Code Review - sdc/sdc-distribution-client.git/blob

   1 # -*- coding: utf-8 -*-
   2
   3 """
   4 requests.utils
   5 ~~~~~~~~~~~~~~
   6
   7 This module provides utility functions that are used within Requests
   8 that are also useful for external consumption.
   9
  10 """
  11
  12 import cgi
  13 import codecs
  14 import collections
  15 import io
  16 import os
  17 import re
  18 import socket
  19 import struct
  20 import warnings
  21
  22 from . import __version__
  23 from . import certs
  24 from .compat import parse_http_list as _parse_list_header
  25 from .compat import (quote, urlparse, bytes, str, OrderedDict, unquote, is_py2,
  26                      builtin_str, getproxies, proxy_bypass, urlunparse,
  27                      basestring)
  28 from .cookies import RequestsCookieJar, cookiejar_from_dict
  29 from .structures import CaseInsensitiveDict
  30 from .exceptions import InvalidURL, FileModeWarning
  31
  32 _hush_pyflakes = (RequestsCookieJar,)
  33
  34 NETRC_FILES = ('.netrc', '_netrc')
  35
  36 DEFAULT_CA_BUNDLE_PATH = certs.where()
  37
  38
  39 def dict_to_sequence(d):
  40     """Returns an internal sequence dictionary update."""
  41
  42     if hasattr(d, 'items'):
  43         d = d.items()
  44
  45     return d
  46
  47
  48 def super_len(o):
  49     total_length = 0
  50     current_position = 0
  51
  52     if hasattr(o, '__len__'):
  53         total_length = len(o)
  54
  55     elif hasattr(o, 'len'):
  56         total_length = o.len
  57
  58     elif hasattr(o, 'getvalue'):
  59         # e.g. BytesIO, cStringIO.StringIO
  60         total_length = len(o.getvalue())
  61
  62     elif hasattr(o, 'fileno'):
  63         try:
  64             fileno = o.fileno()
  65         except io.UnsupportedOperation:
  66             pass
  67         else:
  68             total_length = os.fstat(fileno).st_size
  69
  70             # Having used fstat to determine the file length, we need to
  71             # confirm that this file was opened up in binary mode.
  72             if 'b' not in o.mode:
  73                 warnings.warn((
  74                     "Requests has determined the content-length for this "
  75                     "request using the binary size of the file: however, the "
  76                     "file has been opened in text mode (i.e. without the 'b' "
  77                     "flag in the mode). This may lead to an incorrect "
  78                     "content-length. In Requests 3.0, support will be removed "
  79                     "for files in text mode."),
  80                     FileModeWarning
  81                 )
  82
  83     if hasattr(o, 'tell'):
  84         try:
  85             current_position = o.tell()
  86         except (OSError, IOError):
  87             # This can happen in some weird situations, such as when the file
  88             # is actually a special file descriptor like stdin. In this
  89             # instance, we don't know what the length is, so set it to zero and
  90             # let requests chunk it instead.
  91             current_position = total_length
  92
  93     return max(0, total_length - current_position)
  94
  95
  96 def get_netrc_auth(url, raise_errors=False):
  97     """Returns the Requests tuple auth for a given url from netrc."""
  98
  99     try:
 100         from netrc import netrc, NetrcParseError
 101
 102         netrc_path = None
 103
 104         for f in NETRC_FILES:
 105             try:
 106                 loc = os.path.expanduser('~/{0}'.format(f))
 107             except KeyError:
 108                 # os.path.expanduser can fail when $HOME is undefined and
 109                 # getpwuid fails. See http://bugs.python.org/issue20164 &
 110                 # https://github.com/kennethreitz/requests/issues/1846
 111                 return
 112
 113             if os.path.exists(loc):
 114                 netrc_path = loc
 115                 break
 116
 117         # Abort early if there isn't one.
 118         if netrc_path is None:
 119             return
 120
 121         ri = urlparse(url)
 122
 123         # Strip port numbers from netloc. This weird `if...encode`` dance is
 124         # used for Python 3.2, which doesn't support unicode literals.
 125         splitstr = b':'
 126         if isinstance(url, str):
 127             splitstr = splitstr.decode('ascii')
 128         host = ri.netloc.split(splitstr)[0]
 129
 130         try:
 131             _netrc = netrc(netrc_path).authenticators(host)
 132             if _netrc:
 133                 # Return with login / password
 134                 login_i = (0 if _netrc[0] else 1)
 135                 return (_netrc[login_i], _netrc[2])
 136         except (NetrcParseError, IOError):
 137             # If there was a parsing error or a permissions issue reading the file,
 138             # we'll just skip netrc auth unless explicitly asked to raise errors.
 139             if raise_errors:
 140                 raise
 141
 142     # AppEngine hackiness.
 143     except (ImportError, AttributeError):
 144         pass
 145
 146
 147 def guess_filename(obj):
 148     """Tries to guess the filename of the given object."""
 149     name = getattr(obj, 'name', None)
 150     if (name and isinstance(name, basestring) and name[0] != '<' and
 151             name[-1] != '>'):
 152         return os.path.basename(name)
 153
 154
 155 def from_key_val_list(value):
 156     """Take an object and test to see if it can be represented as a
 157     dictionary. Unless it can not be represented as such, return an
 158     OrderedDict, e.g.,
 159
 160     ::
 161
 162         >>> from_key_val_list([('key', 'val')])
 163         OrderedDict([('key', 'val')])
 164         >>> from_key_val_list('string')
 165         ValueError: need more than 1 value to unpack
 166         >>> from_key_val_list({'key': 'val'})
 167         OrderedDict([('key', 'val')])
 168     """
 169     if value is None:
 170         return None
 171
 172     if isinstance(value, (str, bytes, bool, int)):
 173         raise ValueError('cannot encode objects that are not 2-tuples')
 174
 175     return OrderedDict(value)
 176
 177
 178 def to_key_val_list(value):
 179     """Take an object and test to see if it can be represented as a
 180     dictionary. If it can be, return a list of tuples, e.g.,
 181
 182     ::
 183
 184         >>> to_key_val_list([('key', 'val')])
 185         [('key', 'val')]
 186         >>> to_key_val_list({'key': 'val'})
 187         [('key', 'val')]
 188         >>> to_key_val_list('string')
 189         ValueError: cannot encode objects that are not 2-tuples.
 190     """
 191     if value is None:
 192         return None
 193
 194     if isinstance(value, (str, bytes, bool, int)):
 195         raise ValueError('cannot encode objects that are not 2-tuples')
 196
 197     if isinstance(value, collections.Mapping):
 198         value = value.items()
 199
 200     return list(value)
 201
 202
 203 # From mitsuhiko/werkzeug (used with permission).
 204 def parse_list_header(value):
 205     """Parse lists as described by RFC 2068 Section 2.
 206
 207     In particular, parse comma-separated lists where the elements of
 208     the list may include quoted-strings.  A quoted-string could
 209     contain a comma.  A non-quoted string could have quotes in the
 210     middle.  Quotes are removed automatically after parsing.
 211
 212     It basically works like :func:`parse_set_header` just that items
 213     may appear multiple times and case sensitivity is preserved.
 214
 215     The return value is a standard :class:`list`:
 216
 217     >>> parse_list_header('token, "quoted value"')
 218     ['token', 'quoted value']
 219
 220     To create a header from the :class:`list` again, use the
 221     :func:`dump_header` function.
 222
 223     :param value: a string with a list header.
 224     :return: :class:`list`
 225     """
 226     result = []
 227     for item in _parse_list_header(value):
 228         if item[:1] == item[-1:] == '"':
 229             item = unquote_header_value(item[1:-1])
 230         result.append(item)
 231     return result
 232
 233
 234 # From mitsuhiko/werkzeug (used with permission).
 235 def parse_dict_header(value):
 236     """Parse lists of key, value pairs as described by RFC 2068 Section 2 and
 237     convert them into a python dict:
 238
 239     >>> d = parse_dict_header('foo="is a fish", bar="as well"')
 240     >>> type(d) is dict
 241     True
 242     >>> sorted(d.items())
 243     [('bar', 'as well'), ('foo', 'is a fish')]
 244
 245     If there is no value for a key it will be `None`:
 246
 247     >>> parse_dict_header('key_without_value')
 248     {'key_without_value': None}
 249
 250     To create a header from the :class:`dict` again, use the
 251     :func:`dump_header` function.
 252
 253     :param value: a string with a dict header.
 254     :return: :class:`dict`
 255     """
 256     result = {}
 257     for item in _parse_list_header(value):
 258         if '=' not in item:
 259             result[item] = None
 260             continue
 261         name, value = item.split('=', 1)
 262         if value[:1] == value[-1:] == '"':
 263             value = unquote_header_value(value[1:-1])
 264         result[name] = value
 265     return result
 266
 267
 268 # From mitsuhiko/werkzeug (used with permission).
 269 def unquote_header_value(value, is_filename=False):
 270     r"""Unquotes a header value.  (Reversal of :func:`quote_header_value`).
 271     This does not use the real unquoting but what browsers are actually
 272     using for quoting.
 273
 274     :param value: the header value to unquote.
 275     """
 276     if value and value[0] == value[-1] == '"':
 277         # this is not the real unquoting, but fixing this so that the
 278         # RFC is met will result in bugs with internet explorer and
 279         # probably some other browsers as well.  IE for example is
 280         # uploading files with "C:\foo\bar.txt" as filename
 281         value = value[1:-1]
 282
 283         # if this is a filename and the starting characters look like
 284         # a UNC path, then just return the value without quotes.  Using the
 285         # replace sequence below on a UNC path has the effect of turning
 286         # the leading double slash into a single slash and then
 287         # _fix_ie_filename() doesn't work correctly.  See #458.
 288         if not is_filename or value[:2] != '\\\\':
 289             return value.replace('\\\\', '\\').replace('\\"', '"')
 290     return value
 291
 292
 293 def dict_from_cookiejar(cj):
 294     """Returns a key/value dictionary from a CookieJar.
 295
 296     :param cj: CookieJar object to extract cookies from.
 297     """
 298
 299     cookie_dict = {}
 300
 301     for cookie in cj:
 302         cookie_dict[cookie.name] = cookie.value
 303
 304     return cookie_dict
 305
 306
 307 def add_dict_to_cookiejar(cj, cookie_dict):
 308     """Returns a CookieJar from a key/value dictionary.
 309
 310     :param cj: CookieJar to insert cookies into.
 311     :param cookie_dict: Dict of key/values to insert into CookieJar.
 312     """
 313
 314     cj2 = cookiejar_from_dict(cookie_dict)
 315     cj.update(cj2)
 316     return cj
 317
 318
 319 def get_encodings_from_content(content):
 320     """Returns encodings from given content string.
 321
 322     :param content: bytestring to extract encodings from.
 323     """
 324     warnings.warn((
 325         'In requests 3.0, get_encodings_from_content will be removed. For '
 326         'more information, please see the discussion on issue #2266. (This'
 327         ' warning should only appear once.)'),
 328         DeprecationWarning)
 329
 330     charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
 331     pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
 332     xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')
 333
 334     return (charset_re.findall(content) +
 335             pragma_re.findall(content) +
 336             xml_re.findall(content))
 337
 338
 339 def get_encoding_from_headers(headers):
 340     """Returns encodings from given HTTP Header Dict.
 341
 342     :param headers: dictionary to extract encoding from.
 343     """
 344
 345     content_type = headers.get('content-type')
 346
 347     if not content_type:
 348         return None
 349
 350     content_type, params = cgi.parse_header(content_type)
 351
 352     if 'charset' in params:
 353         return params['charset'].strip("'\"")
 354
 355     if 'text' in content_type:
 356         return 'ISO-8859-1'
 357
 358
 359 def stream_decode_response_unicode(iterator, r):
 360     """Stream decodes a iterator."""
 361
 362     if r.encoding is None:
 363         for item in iterator:
 364             yield item
 365         return
 366
 367     decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace')
 368     for chunk in iterator:
 369         rv = decoder.decode(chunk)
 370         if rv:
 371             yield rv
 372     rv = decoder.decode(b'', final=True)
 373     if rv:
 374         yield rv
 375
 376
 377 def iter_slices(string, slice_length):
 378     """Iterate over slices of a string."""
 379     pos = 0
 380     while pos < len(string):
 381         yield string[pos:pos + slice_length]
 382         pos += slice_length
 383
 384
 385 def get_unicode_from_response(r):
 386     """Returns the requested content back in unicode.
 387
 388     :param r: Response object to get unicode content from.
 389
 390     Tried:
 391
 392     1. charset from content-type
 393     2. fall back and replace all unicode characters
 394
 395     """
 396     warnings.warn((
 397         'In requests 3.0, get_unicode_from_response will be removed. For '
 398         'more information, please see the discussion on issue #2266. (This'
 399         ' warning should only appear once.)'),
 400         DeprecationWarning)
 401
 402     tried_encodings = []
 403
 404     # Try charset from content-type
 405     encoding = get_encoding_from_headers(r.headers)
 406
 407     if encoding:
 408         try:
 409             return str(r.content, encoding)
 410         except UnicodeError:
 411             tried_encodings.append(encoding)
 412
 413     # Fall back:
 414     try:
 415         return str(r.content, encoding, errors='replace')
 416     except TypeError:
 417         return r.content
 418
 419
 420 # The unreserved URI characters (RFC 3986)
 421 UNRESERVED_SET = frozenset(
 422     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
 423     + "0123456789-._~")
 424
 425
 426 def unquote_unreserved(uri):
 427     """Un-escape any percent-escape sequences in a URI that are unreserved
 428     characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
 429     """
 430     parts = uri.split('%')
 431     for i in range(1, len(parts)):
 432         h = parts[i][0:2]
 433         if len(h) == 2 and h.isalnum():
 434             try:
 435                 c = chr(int(h, 16))
 436             except ValueError:
 437                 raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
 438
 439             if c in UNRESERVED_SET:
 440                 parts[i] = c + parts[i][2:]
 441             else:
 442                 parts[i] = '%' + parts[i]
 443         else:
 444             parts[i] = '%' + parts[i]
 445     return ''.join(parts)
 446
 447
 448 def requote_uri(uri):
 449     """Re-quote the given URI.
 450
 451     This function passes the given URI through an unquote/quote cycle to
 452     ensure that it is fully and consistently quoted.
 453     """
 454     safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
 455     safe_without_percent = "!#$&'()*+,/:;=?@[]~"
 456     try:
 457         # Unquote only the unreserved characters
 458         # Then quote only illegal characters (do not quote reserved,
 459         # unreserved, or '%')
 460         return quote(unquote_unreserved(uri), safe=safe_with_percent)
 461     except InvalidURL:
 462         # We couldn't unquote the given URI, so let's try quoting it, but
 463         # there may be unquoted '%'s in the URI. We need to make sure they're
 464         # properly quoted so they do not cause issues elsewhere.
 465         return quote(uri, safe=safe_without_percent)
 466
 467
 468 def address_in_network(ip, net):
 469     """
 470     This function allows you to check if on IP belongs to a network subnet
 471     Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24
 472              returns False if ip = 192.168.1.1 and net = 192.168.100.0/24
 473     """
 474     ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0]
 475     netaddr, bits = net.split('/')
 476     netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0]
 477     network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask
 478     return (ipaddr & netmask) == (network & netmask)
 479
 480
 481 def dotted_netmask(mask):
 482     """
 483     Converts mask from /xx format to xxx.xxx.xxx.xxx
 484     Example: if mask is 24 function returns 255.255.255.0
 485     """
 486     bits = 0xffffffff ^ (1 << 32 - mask) - 1
 487     return socket.inet_ntoa(struct.pack('>I', bits))
 488
 489
 490 def is_ipv4_address(string_ip):
 491     try:
 492         socket.inet_aton(string_ip)
 493     except socket.error:
 494         return False
 495     return True
 496
 497
 498 def is_valid_cidr(string_network):
 499     """Very simple check of the cidr format in no_proxy variable"""
 500     if string_network.count('/') == 1:
 501         try:
 502             mask = int(string_network.split('/')[1])
 503         except ValueError:
 504             return False
 505
 506         if mask < 1 or mask > 32:
 507             return False
 508
 509         try:
 510             socket.inet_aton(string_network.split('/')[0])
 511         except socket.error:
 512             return False
 513     else:
 514         return False
 515     return True
 516
 517
 518 def should_bypass_proxies(url):
 519     """
 520     Returns whether we should bypass proxies or not.
 521     """
 522     get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())
 523
 524     # First check whether no_proxy is defined. If it is, check that the URL
 525     # we're getting isn't in the no_proxy list.
 526     no_proxy = get_proxy('no_proxy')
 527     netloc = urlparse(url).netloc
 528
 529     if no_proxy:
 530         # We need to check whether we match here. We need to see if we match
 531         # the end of the netloc, both with and without the port.
 532         no_proxy = (
 533             host for host in no_proxy.replace(' ', '').split(',') if host
 534         )
 535
 536         ip = netloc.split(':')[0]
 537         if is_ipv4_address(ip):
 538             for proxy_ip in no_proxy:
 539                 if is_valid_cidr(proxy_ip):
 540                     if address_in_network(ip, proxy_ip):
 541                         return True
 542         else:
 543             for host in no_proxy:
 544                 if netloc.endswith(host) or netloc.split(':')[0].endswith(host):
 545                     # The URL does match something in no_proxy, so we don't want
 546                     # to apply the proxies on this URL.
 547                     return True
 548
 549     # If the system proxy settings indicate that this URL should be bypassed,
 550     # don't proxy.
 551     # The proxy_bypass function is incredibly buggy on OS X in early versions
 552     # of Python 2.6, so allow this call to fail. Only catch the specific
 553     # exceptions we've seen, though: this call failing in other ways can reveal
 554     # legitimate problems.
 555     try:
 556         bypass = proxy_bypass(netloc)
 557     except (TypeError, socket.gaierror):
 558         bypass = False
 559
 560     if bypass:
 561         return True
 562
 563     return False
 564
 565
 566 def get_environ_proxies(url):
 567     """Return a dict of environment proxies."""
 568     if should_bypass_proxies(url):
 569         return {}
 570     else:
 571         return getproxies()
 572
 573
 574 def select_proxy(url, proxies):
 575     """Select a proxy for the url, if applicable.
 576
 577     :param url: The url being for the request
 578     :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs
 579     """
 580     proxies = proxies or {}
 581     urlparts = urlparse(url)
 582     if urlparts.hostname is None:
 583         proxy = None
 584     else:
 585         proxy = proxies.get(urlparts.scheme+'://'+urlparts.hostname)
 586     if proxy is None:
 587         proxy = proxies.get(urlparts.scheme)
 588     return proxy
 589
 590
 591 def default_user_agent(name="python-requests"):
 592     """Return a string representing the default user agent."""
 593     return '%s/%s' % (name, __version__)
 594
 595
 596 def default_headers():
 597     return CaseInsensitiveDict({
 598         'User-Agent': default_user_agent(),
 599         'Accept-Encoding': ', '.join(('gzip', 'deflate')),
 600         'Accept': '*/*',
 601         'Connection': 'keep-alive',
 602     })
 603
 604
 605 def parse_header_links(value):
 606     """Return a dict of parsed link headers proxies.
 607
 608     i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg"
 609
 610     """
 611
 612     links = []
 613
 614     replace_chars = ' \'"'
 615
 616     for val in re.split(', *<', value):
 617         try:
 618             url, params = val.split(';', 1)
 619         except ValueError:
 620             url, params = val, ''
 621
 622         link = {'url': url.strip('<> \'"')}
 623
 624         for param in params.split(';'):
 625             try:
 626                 key, value = param.split('=')
 627             except ValueError:
 628                 break
 629
 630             link[key.strip(replace_chars)] = value.strip(replace_chars)
 631
 632         links.append(link)
 633
 634     return links
 635
 636
 637 # Null bytes; no need to recreate these on each call to guess_json_utf
 638 _null = '\x00'.encode('ascii')  # encoding to ASCII for Python 3
 639 _null2 = _null * 2
 640 _null3 = _null * 3
 641
 642
 643 def guess_json_utf(data):
 644     # JSON always starts with two ASCII characters, so detection is as
 645     # easy as counting the nulls and from their location and count
 646     # determine the encoding. Also detect a BOM, if present.
 647     sample = data[:4]
 648     if sample in (codecs.BOM_UTF32_LE, codecs.BOM32_BE):
 649         return 'utf-32'     # BOM included
 650     if sample[:3] == codecs.BOM_UTF8:
 651         return 'utf-8-sig'  # BOM included, MS style (discouraged)
 652     if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
 653         return 'utf-16'     # BOM included
 654     nullcount = sample.count(_null)
 655     if nullcount == 0:
 656         return 'utf-8'
 657     if nullcount == 2:
 658         if sample[::2] == _null2:   # 1st and 3rd are null
 659             return 'utf-16-be'
 660         if sample[1::2] == _null2:  # 2nd and 4th are null
 661             return 'utf-16-le'
 662         # Did not detect 2 valid UTF-16 ascii-range characters
 663     if nullcount == 3:
 664         if sample[:3] == _null3:
 665             return 'utf-32-be'
 666         if sample[1:] == _null3:
 667             return 'utf-32-le'
 668         # Did not detect a valid UTF-32 ascii-range character
 669     return None
 670
 671
 672 def prepend_scheme_if_needed(url, new_scheme):
 673     """Given a URL that may or may not have a scheme, prepend the given scheme.
 674     Does not replace a present scheme with the one provided as an argument."""
 675     scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme)
 676
 677     # urlparse is a finicky beast, and sometimes decides that there isn't a
 678     # netloc present. Assume that it's being over-cautious, and switch netloc
 679     # and path if urlparse decided there was no netloc.
 680     if not netloc:
 681         netloc, path = path, netloc
 682
 683     return urlunparse((scheme, netloc, path, params, query, fragment))
 684
 685
 686 def get_auth_from_url(url):
 687     """Given a url with authentication components, extract them into a tuple of
 688     username,password."""
 689     parsed = urlparse(url)
 690
 691     try:
 692         auth = (unquote(parsed.username), unquote(parsed.password))
 693     except (AttributeError, TypeError):
 694         auth = ('', '')
 695
 696     return auth
 697
 698
 699 def to_native_string(string, encoding='ascii'):
 700     """
 701     Given a string object, regardless of type, returns a representation of that
 702     string in the native string type, encoding and decoding where necessary.
 703     This assumes ASCII unless told otherwise.
 704     """
 705     if isinstance(string, builtin_str):
 706         out = string
 707     else:
 708         if is_py2:
 709             out = string.encode(encoding)
 710         else:
 711             out = string.decode(encoding)
 712
 713     return out
 714
 715
 716 def urldefragauth(url):
 717     """
 718     Given a url remove the fragment and the authentication part
 719     """
 720     scheme, netloc, path, params, query, fragment = urlparse(url)
 721
 722     # see func:`prepend_scheme_if_needed`
 723     if not netloc:
 724         netloc, path = path, netloc
 725
 726     netloc = netloc.rsplit('@', 1)[-1]
 727
 728     return urlunparse((scheme, netloc, path, params, query, ''))