diff --git a/lib/cloudscraper/__init__.py b/lib/cloudscraper/__init__.py index eb957a63..d10d2f73 100644 --- a/lib/cloudscraper/__init__.py +++ b/lib/cloudscraper/__init__.py @@ -1,8 +1,26 @@ +# ------------------------------------------------------------------------------- # + import logging import re +import requests import sys import ssl -import requests + +from collections import OrderedDict +from copy import deepcopy + +from requests.adapters import HTTPAdapter +from requests.sessions import Session +from requests_toolbelt.utils import dump + +from time import sleep + +# ------------------------------------------------------------------------------- # + +try: + import brotli +except ImportError: + pass try: import copyreg @@ -17,12 +35,12 @@ except ImportError: else: from html.parser import HTMLParser -from copy import deepcopy -from time import sleep -from collections import OrderedDict +try: + from urlparse import urlparse, urljoin +except ImportError: + from urllib.parse import urlparse, urljoin -from requests.sessions import Session -from requests.adapters import HTTPAdapter +# ------------------------------------------------------------------------------- # from .exceptions import ( CloudflareLoopProtection, @@ -37,25 +55,9 @@ from .interpreters import JavaScriptInterpreter from .reCaptcha import reCaptcha from .user_agent import User_Agent -try: - from requests_toolbelt.utils import dump -except ImportError: - pass - -try: - import brotli -except ImportError: - pass - -try: - from urlparse import urlparse, urljoin -except ImportError: - from urllib.parse import urlparse, urljoin - - # ------------------------------------------------------------------------------- # -__version__ = '1.2.36' +__version__ = '1.2.40' # ------------------------------------------------------------------------------- # @@ -107,6 +109,9 @@ class CloudScraper(Session): self.ssl_context = kwargs.pop('ssl_context', None) self.interpreter = kwargs.pop('interpreter', 'native') self.recaptcha = kwargs.pop('recaptcha', {}) + self.requestPreHook = kwargs.pop('requestPreHook', None) + self.requestPostHook = kwargs.pop('requestPostHook', None) + self.allow_brotli = kwargs.pop( 'allow_brotli', True if 'brotli' in sys.modules.keys() else False @@ -213,19 +218,46 @@ class CloudScraper(Session): if kwargs.get('proxies') and kwargs.get('proxies') != self.proxies: self.proxies = kwargs.get('proxies') - resp = self.decodeBrotli( + # ------------------------------------------------------------------------------- # + # Pre-Hook the request via user defined function. + # ------------------------------------------------------------------------------- # + + if self.requestPreHook: + (method, url, args, kwargs) = self.requestPreHook( + self, + method, + url, + *args, + **kwargs + ) + + # ------------------------------------------------------------------------------- # + # Make the request via requests. + # ------------------------------------------------------------------------------- # + + response = self.decodeBrotli( super(CloudScraper, self).request(method, url, *args, **kwargs) ) # ------------------------------------------------------------------------------- # - # Debug request + # Debug the request via the Response object. # ------------------------------------------------------------------------------- # if self.debug: - self.debugRequest(resp) + self.debugRequest(response) + + # ------------------------------------------------------------------------------- # + # Post-Hook the request aka Post-Hook the response via user defined function. + # ------------------------------------------------------------------------------- # + + if self.requestPostHook: + response = self.requestPostHook(self, response) + + if self.debug: + self.debugRequest(response) # Check if Cloudflare anti-bot is on - if self.is_Challenge_Request(resp): + if self.is_Challenge_Request(response): # ------------------------------------------------------------------------------- # # Try to solve the challenge and send it back # ------------------------------------------------------------------------------- # @@ -239,12 +271,12 @@ class CloudScraper(Session): self._solveDepthCnt += 1 - resp = self.Challenge_Response(resp, **kwargs) + response = self.Challenge_Response(response, **kwargs) else: - if not resp.is_redirect and resp.status_code not in [429, 503]: + if not response.is_redirect and response.status_code not in [429, 503]: self._solveDepthCnt = 0 - return resp + return response # ------------------------------------------------------------------------------- # # check if the response contains a valid Cloudflare challenge @@ -259,7 +291,7 @@ class CloudScraper(Session): and re.search( r'