Update request urllib3 (#464)

* requests: updated to version 2.27.1 * urllib3: updated to version 1.26.18
2024-01-29 20:12:20 +01:00
parent 349752a380
commit b96b63448c
51 changed files with 2414 additions and 801 deletions
@@ -1,29 +1,38 @@
 from __future__ import absolute_import
-from contextlib import contextmanager
-import zlib
+
 import io
 import logging
-from socket import timeout as SocketTimeout
+import sys
+import warnings
+import zlib
+from contextlib import contextmanager
 from socket import error as SocketError
+from socket import timeout as SocketTimeout

 try:
-    import brotli
+    try:
+        import brotlicffi as brotli
+    except ImportError:
+        import brotli
 except ImportError:
    brotli = None

+from . import util
 from ._collections import HTTPHeaderDict
+from .connection import BaseSSLError, HTTPException
 from .exceptions import (
    BodyNotHttplibCompatible,
-    ProtocolError,
    DecodeError,
+    HTTPError,
+    IncompleteRead,
+    InvalidChunkLength,
+    InvalidHeader,
+    ProtocolError,
    ReadTimeoutError,
    ResponseNotChunked,
-    IncompleteRead,
-    InvalidHeader,
+    SSLError,
 )
-from .packages.six import string_types as basestring, PY3
-from .packages.six.moves import http_client as httplib
-from .connection import HTTPException, BaseSSLError
+from .packages import six
 from .util.response import is_fp_closed, is_response_to_head

 log = logging.getLogger(__name__)
@@ -106,11 +115,10 @@ if brotli is not None:
        # are for 'brotlipy' and bottom branches for 'Brotli'
        def __init__(self):
            self._obj = brotli.Decompressor()
-
-        def decompress(self, data):
            if hasattr(self._obj, "decompress"):
-                return self._obj.decompress(data)
-            return self._obj.process(data)
+                self.decompress = self._obj.decompress
+            else:
+                self.decompress = self._obj.process

        def flush(self):
            if hasattr(self._obj, "flush"):
@@ -156,13 +164,13 @@ class HTTPResponse(io.IOBase):
    """
    HTTP Response container.

-    Backwards-compatible to httplib's HTTPResponse but the response ``body`` is
+    Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
    loaded and decoded on-demand when the ``data`` property is accessed.  This
    class is also compatible with the Python standard library's :mod:`io`
    module, and can hence be treated as a readable object in the context of that
    framework.

-    Extra parameters for behaviour not present in httplib.HTTPResponse:
+    Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:

    :param preload_content:
        If True, the response's body will be preloaded during construction.
@@ -172,7 +180,7 @@ class HTTPResponse(io.IOBase):
        'content-encoding' header.

    :param original_response:
-        When this HTTPResponse wrapper is generated from an httplib.HTTPResponse
+        When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
        object, it's convenient to include the original for debug purposes. It's
        otherwise unused.

@@ -232,7 +240,7 @@ class HTTPResponse(io.IOBase):
        self.msg = msg
        self._request_url = request_url

-        if body and isinstance(body, (basestring, bytes)):
+        if body and isinstance(body, (six.string_types, bytes)):
            self._body = body

        self._pool = pool
@@ -277,9 +285,20 @@ class HTTPResponse(io.IOBase):
        self._pool._put_conn(self._connection)
        self._connection = None

+    def drain_conn(self):
+        """
+        Read and discard any remaining HTTP response data in the response connection.
+
+        Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
+        """
+        try:
+            self.read()
+        except (HTTPError, SocketError, BaseSSLError, HTTPException):
+            pass
+
    @property
    def data(self):
-        # For backwords-compat with earlier urllib3 0.4 and earlier.
+        # For backwards-compat with earlier urllib3 0.4 and earlier.
        if self._body:
            return self._body

@@ -296,8 +315,8 @@ class HTTPResponse(io.IOBase):
    def tell(self):
        """
        Obtain the number of bytes pulled over the wire so far. May differ from
-        the amount of content returned by :meth:``HTTPResponse.read`` if bytes
-        are encoded on the wire (e.g, compressed).
+        the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``
+        if bytes are encoded on the wire (e.g, compressed).
        """
        return self._fp_bytes_read

@@ -431,10 +450,9 @@ class HTTPResponse(io.IOBase):

            except BaseSSLError as e:
                # FIXME: Is there a better way to differentiate between SSLErrors?
-                if "read operation timed out" not in str(e):  # Defensive:
-                    # This shouldn't happen but just in case we're missing an edge
-                    # case, let's avoid swallowing SSL errors.
-                    raise
+                if "read operation timed out" not in str(e):
+                    # SSL errors related to framing/MAC get wrapped and reraised here
+                    raise SSLError(e)

                raise ReadTimeoutError(self._pool, None, "Read timed out.")

@@ -466,9 +484,57 @@ class HTTPResponse(io.IOBase):
            if self._original_response and self._original_response.isclosed():
                self.release_conn()

+    def _fp_read(self, amt):
+        """
+        Read a response with the thought that reading the number of bytes
+        larger than can fit in a 32-bit int at a time via SSL in some
+        known cases leads to an overflow error that has to be prevented
+        if `amt` or `self.length_remaining` indicate that a problem may
+        happen.
+
+        The known cases:
+          * 3.8 <= CPython < 3.9.7 because of a bug
+            https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.
+          * urllib3 injected with pyOpenSSL-backed SSL-support.
+          * CPython < 3.10 only when `amt` does not fit 32-bit int.
+        """
+        assert self._fp
+        c_int_max = 2 ** 31 - 1
+        if (
+            (
+                (amt and amt > c_int_max)
+                or (self.length_remaining and self.length_remaining > c_int_max)
+            )
+            and not util.IS_SECURETRANSPORT
+            and (util.IS_PYOPENSSL or sys.version_info < (3, 10))
+        ):
+            buffer = io.BytesIO()
+            # Besides `max_chunk_amt` being a maximum chunk size, it
+            # affects memory overhead of reading a response by this
+            # method in CPython.
+            # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum
+            # chunk size that does not lead to an overflow error, but
+            # 256 MiB is a compromise.
+            max_chunk_amt = 2 ** 28
+            while amt is None or amt != 0:
+                if amt is not None:
+                    chunk_amt = min(amt, max_chunk_amt)
+                    amt -= chunk_amt
+                else:
+                    chunk_amt = max_chunk_amt
+                data = self._fp.read(chunk_amt)
+                if not data:
+                    break
+                buffer.write(data)
+                del data  # to reduce peak memory usage by `max_chunk_amt`.
+            return buffer.getvalue()
+        else:
+            # StringIO doesn't like amt=None
+            return self._fp.read(amt) if amt is not None else self._fp.read()
+
    def read(self, amt=None, decode_content=None, cache_content=False):
        """
-        Similar to :meth:`httplib.HTTPResponse.read`, but with two additional
+        Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
        parameters: ``decode_content`` and ``cache_content``.

        :param amt:
@@ -498,13 +564,11 @@ class HTTPResponse(io.IOBase):
        fp_closed = getattr(self._fp, "closed", False)

        with self._error_catcher():
+            data = self._fp_read(amt) if not fp_closed else b""
            if amt is None:
-                # cStringIO doesn't like amt=None
-                data = self._fp.read() if not fp_closed else b""
                flush_decoder = True
            else:
                cache_content = False
-                data = self._fp.read(amt) if not fp_closed else b""
                if (
                    amt != 0 and not data
                ):  # Platform-specific: Buggy versions of Python.
@@ -569,7 +633,7 @@ class HTTPResponse(io.IOBase):
    @classmethod
    def from_httplib(ResponseCls, r, **response_kw):
        """
-        Given an :class:`httplib.HTTPResponse` instance ``r``, return a
+        Given an :class:`http.client.HTTPResponse` instance ``r``, return a
        corresponding :class:`urllib3.response.HTTPResponse` object.

        Remaining parameters are passed to the HTTPResponse constructor, along
@@ -578,11 +642,11 @@ class HTTPResponse(io.IOBase):
        headers = r.msg

        if not isinstance(headers, HTTPHeaderDict):
-            if PY3:
-                headers = HTTPHeaderDict(headers.items())
-            else:
+            if six.PY2:
                # Python 2.7
                headers = HTTPHeaderDict.from_httplib(headers)
+            else:
+                headers = HTTPHeaderDict(headers.items())

        # HTTPResponse objects in Python 3 don't have a .strict attribute
        strict = getattr(r, "strict", 0)
@@ -598,11 +662,23 @@ class HTTPResponse(io.IOBase):
        )
        return resp

-    # Backwards-compatibility methods for httplib.HTTPResponse
+    # Backwards-compatibility methods for http.client.HTTPResponse
    def getheaders(self):
+        warnings.warn(
+            "HTTPResponse.getheaders() is deprecated and will be removed "
+            "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",
+            category=DeprecationWarning,
+            stacklevel=2,
+        )
        return self.headers

    def getheader(self, name, default=None):
+        warnings.warn(
+            "HTTPResponse.getheader() is deprecated and will be removed "
+            "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",
+            category=DeprecationWarning,
+            stacklevel=2,
+        )
        return self.headers.get(name, default)

    # Backwards compatibility for http.cookiejar
@@ -668,8 +744,8 @@ class HTTPResponse(io.IOBase):
    def supports_chunked_reads(self):
        """
        Checks if the underlying file-like object looks like a
-        httplib.HTTPResponse object. We do this by testing for the fp
-        attribute. If it is present we assume it returns raw chunks as
+        :class:`http.client.HTTPResponse` object. We do this by testing for
+        the fp attribute. If it is present we assume it returns raw chunks as
        processed by read_chunked().
        """
        return hasattr(self._fp, "fp")
@@ -686,7 +762,7 @@ class HTTPResponse(io.IOBase):
        except ValueError:
            # Invalid chunked protocol response, abort.
            self.close()
-            raise httplib.IncompleteRead(line)
+            raise InvalidChunkLength(self, line)

    def _handle_chunk(self, amt):
        returned_chunk = None
@@ -733,7 +809,7 @@ class HTTPResponse(io.IOBase):
            )
        if not self.supports_chunked_reads():
            raise BodyNotHttplibCompatible(
-                "Body should be httplib.HTTPResponse like. "
+                "Body should be http.client.HTTPResponse like. "
                "It should have have an fp attribute which returns raw chunks."
            )

@@ -792,7 +868,7 @@ class HTTPResponse(io.IOBase):
            return self._request_url

    def __iter__(self):
-        buffer = [b""]
+        buffer = []
        for chunk in self.stream(decode_content=True):
            if b"\n" in chunk:
                chunk = chunk.split(b"\n")