gerrit.onap Code Review - sdc/sdc-distribution-client.git/blob

   1 from __future__ import absolute_import
   2 from contextlib import contextmanager
   3 import zlib
   4 import io
   5 from socket import timeout as SocketTimeout
   6 from socket import error as SocketError
   7
   8 from ._collections import HTTPHeaderDict
   9 from .exceptions import (
  10     ProtocolError, DecodeError, ReadTimeoutError, ResponseNotChunked
  11 )
  12 from .packages.six import string_types as basestring, binary_type, PY3
  13 from .packages.six.moves import http_client as httplib
  14 from .connection import HTTPException, BaseSSLError
  15 from .util.response import is_fp_closed, is_response_to_head
  16
  17
  18 class DeflateDecoder(object):
  19
  20     def __init__(self):
  21         self._first_try = True
  22         self._data = binary_type()
  23         self._obj = zlib.decompressobj()
  24
  25     def __getattr__(self, name):
  26         return getattr(self._obj, name)
  27
  28     def decompress(self, data):
  29         if not data:
  30             return data
  31
  32         if not self._first_try:
  33             return self._obj.decompress(data)
  34
  35         self._data += data
  36         try:
  37             return self._obj.decompress(data)
  38         except zlib.error:
  39             self._first_try = False
  40             self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
  41             try:
  42                 return self.decompress(self._data)
  43             finally:
  44                 self._data = None
  45
  46
  47 class GzipDecoder(object):
  48
  49     def __init__(self):
  50         self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
  51
  52     def __getattr__(self, name):
  53         return getattr(self._obj, name)
  54
  55     def decompress(self, data):
  56         if not data:
  57             return data
  58         return self._obj.decompress(data)
  59
  60
  61 def _get_decoder(mode):
  62     if mode == 'gzip':
  63         return GzipDecoder()
  64
  65     return DeflateDecoder()
  66
  67
  68 class HTTPResponse(io.IOBase):
  69     """
  70     HTTP Response container.
  71
  72     Backwards-compatible to httplib's HTTPResponse but the response ``body`` is
  73     loaded and decoded on-demand when the ``data`` property is accessed.  This
  74     class is also compatible with the Python standard library's :mod:`io`
  75     module, and can hence be treated as a readable object in the context of that
  76     framework.
  77
  78     Extra parameters for behaviour not present in httplib.HTTPResponse:
  79
  80     :param preload_content:
  81         If True, the response's body will be preloaded during construction.
  82
  83     :param decode_content:
  84         If True, attempts to decode specific content-encoding's based on headers
  85         (like 'gzip' and 'deflate') will be skipped and raw data will be used
  86         instead.
  87
  88     :param original_response:
  89         When this HTTPResponse wrapper is generated from an httplib.HTTPResponse
  90         object, it's convenient to include the original for debug purposes. It's
  91         otherwise unused.
  92     """
  93
  94     CONTENT_DECODERS = ['gzip', 'deflate']
  95     REDIRECT_STATUSES = [301, 302, 303, 307, 308]
  96
  97     def __init__(self, body='', headers=None, status=0, version=0, reason=None,
  98                  strict=0, preload_content=True, decode_content=True,
  99                  original_response=None, pool=None, connection=None):
 100
 101         if isinstance(headers, HTTPHeaderDict):
 102             self.headers = headers
 103         else:
 104             self.headers = HTTPHeaderDict(headers)
 105         self.status = status
 106         self.version = version
 107         self.reason = reason
 108         self.strict = strict
 109         self.decode_content = decode_content
 110
 111         self._decoder = None
 112         self._body = None
 113         self._fp = None
 114         self._original_response = original_response
 115         self._fp_bytes_read = 0
 116
 117         if body and isinstance(body, (basestring, binary_type)):
 118             self._body = body
 119
 120         self._pool = pool
 121         self._connection = connection
 122
 123         if hasattr(body, 'read'):
 124             self._fp = body
 125
 126         # Are we using the chunked-style of transfer encoding?
 127         self.chunked = False
 128         self.chunk_left = None
 129         tr_enc = self.headers.get('transfer-encoding', '').lower()
 130         # Don't incur the penalty of creating a list and then discarding it
 131         encodings = (enc.strip() for enc in tr_enc.split(","))
 132         if "chunked" in encodings:
 133             self.chunked = True
 134
 135         # If requested, preload the body.
 136         if preload_content and not self._body:
 137             self._body = self.read(decode_content=decode_content)
 138
 139     def get_redirect_location(self):
 140         """
 141         Should we redirect and where to?
 142
 143         :returns: Truthy redirect location string if we got a redirect status
 144             code and valid location. ``None`` if redirect status and no
 145             location. ``False`` if not a redirect status code.
 146         """
 147         if self.status in self.REDIRECT_STATUSES:
 148             return self.headers.get('location')
 149
 150         return False
 151
 152     def release_conn(self):
 153         if not self._pool or not self._connection:
 154             return
 155
 156         self._pool._put_conn(self._connection)
 157         self._connection = None
 158
 159     @property
 160     def data(self):
 161         # For backwords-compat with earlier urllib3 0.4 and earlier.
 162         if self._body:
 163             return self._body
 164
 165         if self._fp:
 166             return self.read(cache_content=True)
 167
 168     def tell(self):
 169         """
 170         Obtain the number of bytes pulled over the wire so far. May differ from
 171         the amount of content returned by :meth:``HTTPResponse.read`` if bytes
 172         are encoded on the wire (e.g, compressed).
 173         """
 174         return self._fp_bytes_read
 175
 176     def _init_decoder(self):
 177         """
 178         Set-up the _decoder attribute if necessar.
 179         """
 180         # Note: content-encoding value should be case-insensitive, per RFC 7230
 181         # Section 3.2
 182         content_encoding = self.headers.get('content-encoding', '').lower()
 183         if self._decoder is None and content_encoding in self.CONTENT_DECODERS:
 184             self._decoder = _get_decoder(content_encoding)
 185
 186     def _decode(self, data, decode_content, flush_decoder):
 187         """
 188         Decode the data passed in and potentially flush the decoder.
 189         """
 190         try:
 191             if decode_content and self._decoder:
 192                 data = self._decoder.decompress(data)
 193         except (IOError, zlib.error) as e:
 194             content_encoding = self.headers.get('content-encoding', '').lower()
 195             raise DecodeError(
 196                 "Received response with content-encoding: %s, but "
 197                 "failed to decode it." % content_encoding, e)
 198
 199         if flush_decoder and decode_content:
 200             data += self._flush_decoder()
 201
 202         return data
 203
 204     def _flush_decoder(self):
 205         """
 206         Flushes the decoder. Should only be called if the decoder is actually
 207         being used.
 208         """
 209         if self._decoder:
 210             buf = self._decoder.decompress(b'')
 211             return buf + self._decoder.flush()
 212
 213         return b''
 214
 215     @contextmanager
 216     def _error_catcher(self):
 217         """
 218         Catch low-level python exceptions, instead re-raising urllib3
 219         variants, so that low-level exceptions are not leaked in the
 220         high-level api.
 221
 222         On exit, release the connection back to the pool.
 223         """
 224         clean_exit = False
 225
 226         try:
 227             try:
 228                 yield
 229
 230             except SocketTimeout:
 231                 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
 232                 # there is yet no clean way to get at it from this context.
 233                 raise ReadTimeoutError(self._pool, None, 'Read timed out.')
 234
 235             except BaseSSLError as e:
 236                 # FIXME: Is there a better way to differentiate between SSLErrors?
 237                 if 'read operation timed out' not in str(e):  # Defensive:
 238                     # This shouldn't happen but just in case we're missing an edge
 239                     # case, let's avoid swallowing SSL errors.
 240                     raise
 241
 242                 raise ReadTimeoutError(self._pool, None, 'Read timed out.')
 243
 244             except (HTTPException, SocketError) as e:
 245                 # This includes IncompleteRead.
 246                 raise ProtocolError('Connection broken: %r' % e, e)
 247
 248             # If no exception is thrown, we should avoid cleaning up
 249             # unnecessarily.
 250             clean_exit = True
 251         finally:
 252             # If we didn't terminate cleanly, we need to throw away our
 253             # connection.
 254             if not clean_exit:
 255                 # The response may not be closed but we're not going to use it
 256                 # anymore so close it now to ensure that the connection is
 257                 # released back to the pool.
 258                 if self._original_response:
 259                     self._original_response.close()
 260
 261                 # Closing the response may not actually be sufficient to close
 262                 # everything, so if we have a hold of the connection close that
 263                 # too.
 264                 if self._connection:
 265                     self._connection.close()
 266
 267             # If we hold the original response but it's closed now, we should
 268             # return the connection back to the pool.
 269             if self._original_response and self._original_response.isclosed():
 270                 self.release_conn()
 271
 272     def read(self, amt=None, decode_content=None, cache_content=False):
 273         """
 274         Similar to :meth:`httplib.HTTPResponse.read`, but with two additional
 275         parameters: ``decode_content`` and ``cache_content``.
 276
 277         :param amt:
 278             How much of the content to read. If specified, caching is skipped
 279             because it doesn't make sense to cache partial content as the full
 280             response.
 281
 282         :param decode_content:
 283             If True, will attempt to decode the body based on the
 284             'content-encoding' header.
 285
 286         :param cache_content:
 287             If True, will save the returned data such that the same result is
 288             returned despite of the state of the underlying file object. This
 289             is useful if you want the ``.data`` property to continue working
 290             after having ``.read()`` the file object. (Overridden if ``amt`` is
 291             set.)
 292         """
 293         self._init_decoder()
 294         if decode_content is None:
 295             decode_content = self.decode_content
 296
 297         if self._fp is None:
 298             return
 299
 300         flush_decoder = False
 301         data = None
 302
 303         with self._error_catcher():
 304             if amt is None:
 305                 # cStringIO doesn't like amt=None
 306                 data = self._fp.read()
 307                 flush_decoder = True
 308             else:
 309                 cache_content = False
 310                 data = self._fp.read(amt)
 311                 if amt != 0 and not data:  # Platform-specific: Buggy versions of Python.
 312                     # Close the connection when no data is returned
 313                     #
 314                     # This is redundant to what httplib/http.client _should_
 315                     # already do.  However, versions of python released before
 316                     # December 15, 2012 (http://bugs.python.org/issue16298) do
 317                     # not properly close the connection in all cases. There is
 318                     # no harm in redundantly calling close.
 319                     self._fp.close()
 320                     flush_decoder = True
 321
 322         if data:
 323             self._fp_bytes_read += len(data)
 324
 325             data = self._decode(data, decode_content, flush_decoder)
 326
 327             if cache_content:
 328                 self._body = data
 329
 330         return data
 331
 332     def stream(self, amt=2**16, decode_content=None):
 333         """
 334         A generator wrapper for the read() method. A call will block until
 335         ``amt`` bytes have been read from the connection or until the
 336         connection is closed.
 337
 338         :param amt:
 339             How much of the content to read. The generator will return up to
 340             much data per iteration, but may return less. This is particularly
 341             likely when using compressed data. However, the empty string will
 342             never be returned.
 343
 344         :param decode_content:
 345             If True, will attempt to decode the body based on the
 346             'content-encoding' header.
 347         """
 348         if self.chunked:
 349             for line in self.read_chunked(amt, decode_content=decode_content):
 350                 yield line
 351         else:
 352             while not is_fp_closed(self._fp):
 353                 data = self.read(amt=amt, decode_content=decode_content)
 354
 355                 if data:
 356                     yield data
 357
 358     @classmethod
 359     def from_httplib(ResponseCls, r, **response_kw):
 360         """
 361         Given an :class:`httplib.HTTPResponse` instance ``r``, return a
 362         corresponding :class:`urllib3.response.HTTPResponse` object.
 363
 364         Remaining parameters are passed to the HTTPResponse constructor, along
 365         with ``original_response=r``.
 366         """
 367         headers = r.msg
 368
 369         if not isinstance(headers, HTTPHeaderDict):
 370             if PY3:  # Python 3
 371                 headers = HTTPHeaderDict(headers.items())
 372             else:  # Python 2
 373                 headers = HTTPHeaderDict.from_httplib(headers)
 374
 375         # HTTPResponse objects in Python 3 don't have a .strict attribute
 376         strict = getattr(r, 'strict', 0)
 377         resp = ResponseCls(body=r,
 378                            headers=headers,
 379                            status=r.status,
 380                            version=r.version,
 381                            reason=r.reason,
 382                            strict=strict,
 383                            original_response=r,
 384                            **response_kw)
 385         return resp
 386
 387     # Backwards-compatibility methods for httplib.HTTPResponse
 388     def getheaders(self):
 389         return self.headers
 390
 391     def getheader(self, name, default=None):
 392         return self.headers.get(name, default)
 393
 394     # Overrides from io.IOBase
 395     def close(self):
 396         if not self.closed:
 397             self._fp.close()
 398
 399         if self._connection:
 400             self._connection.close()
 401
 402     @property
 403     def closed(self):
 404         if self._fp is None:
 405             return True
 406         elif hasattr(self._fp, 'closed'):
 407             return self._fp.closed
 408         elif hasattr(self._fp, 'isclosed'):  # Python 2
 409             return self._fp.isclosed()
 410         else:
 411             return True
 412
 413     def fileno(self):
 414         if self._fp is None:
 415             raise IOError("HTTPResponse has no file to get a fileno from")
 416         elif hasattr(self._fp, "fileno"):
 417             return self._fp.fileno()
 418         else:
 419             raise IOError("The file-like object this HTTPResponse is wrapped "
 420                           "around has no file descriptor")
 421
 422     def flush(self):
 423         if self._fp is not None and hasattr(self._fp, 'flush'):
 424             return self._fp.flush()
 425
 426     def readable(self):
 427         # This method is required for `io` module compatibility.
 428         return True
 429
 430     def readinto(self, b):
 431         # This method is required for `io` module compatibility.
 432         temp = self.read(len(b))
 433         if len(temp) == 0:
 434             return 0
 435         else:
 436             b[:len(temp)] = temp
 437             return len(temp)
 438
 439     def _update_chunk_length(self):
 440         # First, we'll figure out length of a chunk and then
 441         # we'll try to read it from socket.
 442         if self.chunk_left is not None:
 443             return
 444         line = self._fp.fp.readline()
 445         line = line.split(b';', 1)[0]
 446         try:
 447             self.chunk_left = int(line, 16)
 448         except ValueError:
 449             # Invalid chunked protocol response, abort.
 450             self.close()
 451             raise httplib.IncompleteRead(line)
 452
 453     def _handle_chunk(self, amt):
 454         returned_chunk = None
 455         if amt is None:
 456             chunk = self._fp._safe_read(self.chunk_left)
 457             returned_chunk = chunk
 458             self._fp._safe_read(2)  # Toss the CRLF at the end of the chunk.
 459             self.chunk_left = None
 460         elif amt < self.chunk_left:
 461             value = self._fp._safe_read(amt)
 462             self.chunk_left = self.chunk_left - amt
 463             returned_chunk = value
 464         elif amt == self.chunk_left:
 465             value = self._fp._safe_read(amt)
 466             self._fp._safe_read(2)  # Toss the CRLF at the end of the chunk.
 467             self.chunk_left = None
 468             returned_chunk = value
 469         else:  # amt > self.chunk_left
 470             returned_chunk = self._fp._safe_read(self.chunk_left)
 471             self._fp._safe_read(2)  # Toss the CRLF at the end of the chunk.
 472             self.chunk_left = None
 473         return returned_chunk
 474
 475     def read_chunked(self, amt=None, decode_content=None):
 476         """
 477         Similar to :meth:`HTTPResponse.read`, but with an additional
 478         parameter: ``decode_content``.
 479
 480         :param decode_content:
 481             If True, will attempt to decode the body based on the
 482             'content-encoding' header.
 483         """
 484         self._init_decoder()
 485         # FIXME: Rewrite this method and make it a class with a better structured logic.
 486         if not self.chunked:
 487             raise ResponseNotChunked(
 488                 "Response is not chunked. "
 489                 "Header 'transfer-encoding: chunked' is missing.")
 490
 491         # Don't bother reading the body of a HEAD request.
 492         if self._original_response and is_response_to_head(self._original_response):
 493             self._original_response.close()
 494             return
 495
 496         with self._error_catcher():
 497             while True:
 498                 self._update_chunk_length()
 499                 if self.chunk_left == 0:
 500                     break
 501                 chunk = self._handle_chunk(amt)
 502                 decoded = self._decode(chunk, decode_content=decode_content,
 503                                        flush_decoder=False)
 504                 if decoded:
 505                     yield decoded
 506
 507             if decode_content:
 508                 # On CPython and PyPy, we should never need to flush the
 509                 # decoder. However, on Jython we *might* need to, so
 510                 # lets defensively do it anyway.
 511                 decoded = self._flush_decoder()
 512                 if decoded:  # Platform-specific: Jython.
 513                     yield decoded
 514
 515             # Chunk content ends with \r\n: discard it.
 516             while True:
 517                 line = self._fp.fp.readline()
 518                 if not line:
 519                     # Some sites may not end with '\r\n'.
 520                     break
 521                 if line == b'\r\n':
 522                     break
 523
 524             # We read everything; close the "file".
 525             if self._original_response:
 526                 self._original_response.close()