From 55c6ac7c8f747d9433f5522d2a3b142e960d938e Mon Sep 17 00:00:00 2001
From: Alhaziel01 <alhaziel01@gmail.com>
Date: Sun, 5 Apr 2020 10:45:28 +0200
Subject: [PATCH] Aggiornato cloudscraper

---
 lib/cloudscraper/__init__.py                 | 135 +++++++++++++------
 lib/cloudscraper/reCaptcha/2captcha.py       |  44 +++---
 lib/cloudscraper/reCaptcha/9kw.py            |  16 ++-
 lib/cloudscraper/reCaptcha/__init__.py       |   6 +-
 lib/cloudscraper/reCaptcha/anticaptcha.py    |  36 +++--
 lib/cloudscraper/reCaptcha/deathbycaptcha.py |  16 ++-
 lib/cloudscraper/user_agent/browsers.json    |   6 +-
 7 files changed, 178 insertions(+), 81 deletions(-)
diff --git a/lib/cloudscraper/__init__.py b/lib/cloudscraper/__init__.py
index a899b145..31a4cd36 100644
--- a/lib/cloudscraper/__init__.py
+++ b/lib/cloudscraper/__init__.py
@@ -54,7 +54,7 @@ except ImportError:
 
 # ------------------------------------------------------------------------------- #
 
-__version__ = '1.2.30'
+__version__ = '1.2.32'
 
 # ------------------------------------------------------------------------------- #
 
@@ -148,6 +148,15 @@ class CloudScraper(Session):
     def __getstate__(self):
         return self.__dict__
 
+    # ------------------------------------------------------------------------------- #
+    # Raise an Exception with no stacktrace and reset depth counter.
+    # ------------------------------------------------------------------------------- #
+
+    def simpleException(self, exception, msg):
+        self._solveDepthCnt = 0
+        sys.tracebacklimit = 0
+        raise exception(msg)
+
     # ------------------------------------------------------------------------------- #
     # debug the request via the response
     # ------------------------------------------------------------------------------- #
@@ -219,9 +228,8 @@ class CloudScraper(Session):
 
             if self._solveDepthCnt >= self.solveDepth:
                 _ = self._solveDepthCnt
-                self._solveDepthCnt = 0
-                sys.tracebacklimit = 0
-                raise CloudflareLoopProtection(
+                self.simpleException(
+                    CloudflareLoopProtection,
                     "!!Loop Protection!! We have tried to solve {} time(s) in a row.".format(_)
                 )
 
@@ -303,8 +311,10 @@ class CloudScraper(Session):
 
     def is_Challenge_Request(self, resp):
         if self.is_Firewall_Blocked(resp):
-            sys.tracebacklimit = 0
-            raise CloudflareCode1020('Cloudflare has blocked this request (Code 1020 Detected).')
+            self.simpleException(
+                CloudflareCode1020,
+                'Cloudflare has blocked this request (Code 1020 Detected).'
+            )
 
         if self.is_reCaptcha_Challenge(resp) or self.is_IUAM_Challenge(resp):
             return True
@@ -317,16 +327,29 @@ class CloudScraper(Session):
 
     def IUAM_Challenge_Response(self, body, url, interpreter):
         try:
-            challengeUUID = re.search(
-                r'id="challenge-form" action="(?P<challengeUUID>\S+)"',
-                body, re.M | re.DOTALL
-            ).groupdict().get('challengeUUID', '')
+            formPayload = re.search(
+                r'<form (?P<form>id="challenge-form" action="(?P<challengeUUID>.*?'
+                r'__cf_chl_jschl_tk__=\S+)"(.*?)</form>)',
+                body,
+                re.M | re.DOTALL
+            ).groupdict()
 
-            payload = OrderedDict(re.findall(r'name="(r|jschl_vc|pass)"\svalue="(.*?)"', body))
+            if not all(key in formPayload for key in ['form', 'challengeUUID']):
+                self.simpleException(
+                    CloudflareIUAMError,
+                    "Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
+                )
+
+            payload = OrderedDict(
+                re.findall(
+                    r'name="(r|jschl_vc|pass)"\svalue="(.*?)"',
+                    formPayload['form']
+                )
+            )
 
         except AttributeError:
-            sys.tracebacklimit = 0
-            raise CloudflareIUAMError(
+            self.simpleException(
+                CloudflareIUAMError,
                 "Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
             )
 
@@ -337,8 +360,8 @@ class CloudScraper(Session):
                 interpreter
             ).solveChallenge(body, hostParsed.netloc)
         except Exception as e:
-            sys.tracebacklimit = 0
-            raise CloudflareIUAMError(
+            self.simpleException(
+                CloudflareIUAMError,
                 'Unable to parse Cloudflare anti-bots page: {}'.format(
                     getattr(e, 'message', e)
                 )
@@ -348,7 +371,7 @@ class CloudScraper(Session):
             'url': '{}://{}{}'.format(
                 hostParsed.scheme,
                 hostParsed.netloc,
-                self.unescape(challengeUUID)
+                self.unescape(formPayload['challengeUUID'])
             ),
             'data': payload
         }
@@ -359,34 +382,62 @@ class CloudScraper(Session):
 
     def reCaptcha_Challenge_Response(self, provider, provider_params, body, url):
         try:
-            payload = re.search(
-                r'(name="r"\svalue="(?P<r>\S+)"|).*?challenge-form" action="(?P<challengeUUID>\S+)".*?'
-                r'data-ray="(?P<data_ray>\S+)".*?data-sitekey="(?P<site_key>\S+)"',
-                body, re.M | re.DOTALL
+            formPayload = re.search(
+                r'<form class="challenge-form" (?P<form>id="challenge-form" '
+                r'action="(?P<challengeUUID>.*?__cf_chl_captcha_tk__=\S+)"(.*?)</form>)',
+                body,
+                re.M | re.DOTALL
             ).groupdict()
-        except (AttributeError):
-            sys.tracebacklimit = 0
-            raise CloudflareReCaptchaError(
+
+            if not all(key in formPayload for key in ['form', 'challengeUUID']):
+                self.simpleException(
+                    CloudflareReCaptchaError,
+                    "Cloudflare reCaptcha detected, unfortunately we can't extract the parameters correctly."
+                )
+
+            payload = OrderedDict(
+                re.findall(
+                    r'(name="r"\svalue|data-ray|data-sitekey|name="cf_captcha_kind"\svalue)="(.*?)"',
+                    formPayload['form']
+                )
+            )
+
+            captchaType = 'reCaptcha' if payload['name="cf_captcha_kind" value'] == 're' else 'hCaptcha'
+
+        except (AttributeError, KeyError):
+            self.simpleException(
+                CloudflareReCaptchaError,
                 "Cloudflare reCaptcha detected, unfortunately we can't extract the parameters correctly."
             )
 
+        captchaResponse = reCaptcha.dynamicImport(
+            provider.lower()
+        ).solveCaptcha(
+            captchaType,
+            url,
+            payload['data-sitekey'],
+            provider_params
+        )
+
+        dataPayload = OrderedDict([
+            ('r', payload.get('name="r" value', '')),
+            ('cf_captcha_kind', payload['name="cf_captcha_kind" value']),
+            ('id', payload.get('data-ray')),
+            ('g-recaptcha-response', captchaResponse)
+        ])
+
+        if captchaType == 'hCaptcha':
+            dataPayload.update({'h-captcha-response': captchaResponse})
+
         hostParsed = urlparse(url)
+
         return {
             'url': '{}://{}{}'.format(
                 hostParsed.scheme,
                 hostParsed.netloc,
-                self.unescape(payload.get('challengeUUID', ''))
+                self.unescape(formPayload['challengeUUID'])
             ),
-            'data': OrderedDict([
-                ('r', payload.get('r', '')),
-                ('id', payload.get('data_ray')),
-                (
-                    'g-recaptcha-response',
-                    reCaptcha.dynamicImport(
-                        provider.lower()
-                    ).solveCaptcha(url, payload.get('site_key'), provider_params)
-                )
-            ])
+            'data': dataPayload
         }
 
     # ------------------------------------------------------------------------------- #
@@ -412,8 +463,8 @@ class CloudScraper(Session):
             # ------------------------------------------------------------------------------- #
 
             if not self.recaptcha or not isinstance(self.recaptcha, dict) or not self.recaptcha.get('provider'):
-                sys.tracebacklimit = 0
-                raise CloudflareReCaptchaProvider(
+                self.simpleException(
+                    CloudflareReCaptchaProvider,
                     "Cloudflare reCaptcha detected, unfortunately you haven't loaded an anti reCaptcha provider "
                     "correctly via the 'recaptcha' parameter."
                 )
@@ -448,8 +499,10 @@ class CloudScraper(Session):
                     if isinstance(delay, (int, float)):
                         self.delay = delay
                 except (AttributeError, ValueError):
-                    sys.tracebacklimit = 0
-                    raise CloudflareIUAMError("Cloudflare IUAM possibility malformed, issue extracing delay value.")
+                    self.simpleException(
+                        CloudflareIUAMError,
+                        "Cloudflare IUAM possibility malformed, issue extracing delay value."
+                    )
 
             sleep(self.delay)
 
@@ -507,6 +560,7 @@ class CloudScraper(Session):
 
             if not challengeSubmitResponse.is_redirect:
                 return challengeSubmitResponse
+
             else:
                 cloudflare_kwargs = deepcopy(kwargs)
                 cloudflare_kwargs['headers'] = updateAttr(
@@ -535,6 +589,7 @@ class CloudScraper(Session):
         # ------------------------------------------------------------------------------- #
 
         return self.request(resp.request.method, resp.url, **kwargs)
+
     # ------------------------------------------------------------------------------- #
 
     @classmethod
@@ -587,8 +642,8 @@ class CloudScraper(Session):
                 cookie_domain = d
                 break
         else:
-            sys.tracebacklimit = 0
-            raise CloudflareIUAMError(
+            cls.simpleException(
+                CloudflareIUAMError,
                 "Unable to find Cloudflare cookies. Does the site actually "
                 "have Cloudflare IUAM (I'm Under Attack Mode) enabled?"
             )
diff --git a/lib/cloudscraper/reCaptcha/2captcha.py b/lib/cloudscraper/reCaptcha/2captcha.py
index b3a71fb9..e4789237 100644
--- a/lib/cloudscraper/reCaptcha/2captcha.py
+++ b/lib/cloudscraper/reCaptcha/2captcha.py
@@ -2,7 +2,6 @@ from __future__ import absolute_import
 
 import requests
 
-
 from ..exceptions import (
     reCaptchaServiceUnavailable,
     reCaptchaAPIError,
@@ -81,7 +80,7 @@ class captchaSolver(reCaptcha):
             }
         }
 
-        if response.json().get('status') is False and response.json().get('request') in errors.get(request_type):
+        if response.json().get('status') == 0 and response.json().get('request') in errors.get(request_type):
             raise reCaptchaAPIError(
                 '{} {}'.format(
                     response.json().get('request'),
@@ -113,7 +112,8 @@ class captchaSolver(reCaptcha):
                     'action': 'reportbad',
                     'id': jobID,
                     'json': '1'
-                }
+                },
+                timeout=30
             ),
             check_success=_checkRequest,
             step=5,
@@ -149,7 +149,8 @@ class captchaSolver(reCaptcha):
                     'action': 'get',
                     'id': jobID,
                     'json': '1'
-                }
+                },
+                timeout=30
             ),
             check_success=_checkRequest,
             step=5,
@@ -165,7 +166,7 @@ class captchaSolver(reCaptcha):
 
     # ------------------------------------------------------------------------------- #
 
-    def requestSolve(self, site_url, site_key):
+    def requestSolve(self, captchaType, url, siteKey):
         def _checkRequest(response):
             if response.ok and response.json().get("status") == 1 and response.json().get('request'):
                 return response
@@ -174,18 +175,29 @@ class captchaSolver(reCaptcha):
 
             return None
 
+        data = {
+            'key': self.api_key,
+            'pageurl': url,
+            'json': 1,
+            'soft_id': 5507698
+        }
+
+        data.update(
+            {
+                'method': 'userrcaptcha',
+                'googlekey': siteKey
+            } if captchaType == 'reCaptcha' else {
+                'method': 'hcaptcha',
+                'sitekey': siteKey
+            }
+        )
+
         response = polling.poll(
             lambda: self.session.post(
                 '{}/in.php'.format(self.host),
-                data={
-                    'key': self.api_key,
-                    'method': 'userrecaptcha',
-                    'googlekey': site_key,
-                    'pageurl': site_url,
-                    'json': '1',
-                    'soft_id': '5507698'
-                },
-                allow_redirects=False
+                data=data,
+                allow_redirects=False,
+                timeout=30
             ),
             check_success=_checkRequest,
             step=5,
@@ -201,7 +213,7 @@ class captchaSolver(reCaptcha):
 
     # ------------------------------------------------------------------------------- #
 
-    def getCaptchaAnswer(self, site_url, site_key, reCaptchaParams):
+    def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams):
         jobID = None
 
         if not reCaptchaParams.get('api_key'):
@@ -215,7 +227,7 @@ class captchaSolver(reCaptcha):
             self.session.proxies = reCaptchaParams.get('proxies')
 
         try:
-            jobID = self.requestSolve(site_url, site_key)
+            jobID = self.requestSolve(captchaType, url, siteKey)
             return self.requestJob(jobID)
         except polling.TimeoutException:
             try:
diff --git a/lib/cloudscraper/reCaptcha/9kw.py b/lib/cloudscraper/reCaptcha/9kw.py
index 212b44d8..2404bfe5 100644
--- a/lib/cloudscraper/reCaptcha/9kw.py
+++ b/lib/cloudscraper/reCaptcha/9kw.py
@@ -12,6 +12,7 @@ except ImportError:
     )
 
 from ..exceptions import (
+    reCaptchaException,
     reCaptchaServiceUnavailable,
     reCaptchaAPIError,
     reCaptchaTimeout,
@@ -143,7 +144,7 @@ class captchaSolver(reCaptcha):
 
     # ------------------------------------------------------------------------------- #
 
-    def requestSolve(self, site_url, site_key):
+    def requestSolve(self, url, siteKey):
         def _checkRequest(response):
             if response.ok and response.text.startswith('{') and response.json().get('captchaid'):
                 return response
@@ -159,9 +160,9 @@ class captchaSolver(reCaptcha):
                     'apikey': self.api_key,
                     'action': 'usercaptchaupload',
                     'interactive': 1,
-                    'file-upload-01': site_key,
+                    'file-upload-01': siteKey,
                     'oldsource': 'recaptchav2',
-                    'pageurl': site_url,
+                    'pageurl': url,
                     'maxtimeout': self.maxtimeout,
                     'json': 1
                 },
@@ -179,12 +180,17 @@ class captchaSolver(reCaptcha):
 
     # ------------------------------------------------------------------------------- #
 
-    def getCaptchaAnswer(self, site_url, site_key, reCaptchaParams):
+    def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams):
         jobID = None
 
         if not reCaptchaParams.get('api_key'):
             raise reCaptchaParameter("9kw: Missing api_key parameter.")
 
+        if captchaType == 'hCaptcha':
+            raise reCaptchaException(
+                'Provider does not support hCaptcha.'
+            )
+
         self.api_key = reCaptchaParams.get('api_key')
 
         if reCaptchaParams.get('maxtimeout'):
@@ -194,7 +200,7 @@ class captchaSolver(reCaptcha):
             self.session.proxies = reCaptchaParams.get('proxies')
 
         try:
-            jobID = self.requestSolve(site_url, site_key)
+            jobID = self.requestSolve(url, siteKey)
             return self.requestJob(jobID)
         except polling.TimeoutException:
             raise reCaptchaTimeout(
diff --git a/lib/cloudscraper/reCaptcha/__init__.py b/lib/cloudscraper/reCaptcha/__init__.py
index dee27fcf..f23d2601 100644
--- a/lib/cloudscraper/reCaptcha/__init__.py
+++ b/lib/cloudscraper/reCaptcha/__init__.py
@@ -37,10 +37,10 @@ class reCaptcha(ABC):
     # ------------------------------------------------------------------------------- #
 
     @abc.abstractmethod
-    def getCaptchaAnswer(self, site_url, site_key, reCaptchaParams):
+    def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams):
         pass
 
     # ------------------------------------------------------------------------------- #
 
-    def solveCaptcha(self, site_url, site_key, reCaptchaParams):
-        return self.getCaptchaAnswer(site_url, site_key, reCaptchaParams)
+    def solveCaptcha(self, captchaType, url, siteKey, reCaptchaParams):
+        return self.getCaptchaAnswer(captchaType, url, siteKey, reCaptchaParams)
diff --git a/lib/cloudscraper/reCaptcha/anticaptcha.py b/lib/cloudscraper/reCaptcha/anticaptcha.py
index c6cae275..3c45abe0 100644
--- a/lib/cloudscraper/reCaptcha/anticaptcha.py
+++ b/lib/cloudscraper/reCaptcha/anticaptcha.py
@@ -1,16 +1,22 @@
 from __future__ import absolute_import
 
-from ..exceptions import reCaptchaParameter
+from ..exceptions import (
+    reCaptchaParameter,
+    reCaptchaTimeout,
+    reCaptchaAPIError
+)
 
 try:
     from python_anticaptcha import (
         AnticaptchaClient,
-        NoCaptchaTaskProxylessTask
+        NoCaptchaTaskProxylessTask,
+        HCaptchaTaskProxyless,
+        AnticaptchaException
     )
 except ImportError:
     raise ImportError(
-        "Please install the python module 'python_anticaptcha' via pip or download it from "
-        "https://github.com/ad-m/python-anticaptcha"
+        "Please install/upgrade the python module 'python_anticaptcha' via "
+        "pip install python-anticaptcha or https://github.com/ad-m/python-anticaptcha/"
     )
 
 from . import reCaptcha
@@ -23,7 +29,7 @@ class captchaSolver(reCaptcha):
 
     # ------------------------------------------------------------------------------- #
 
-    def getCaptchaAnswer(self, site_url, site_key, reCaptchaParams):
+    def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams):
         if not reCaptchaParams.get('api_key'):
             raise reCaptchaParameter("anticaptcha: Missing api_key parameter.")
 
@@ -32,16 +38,30 @@ class captchaSolver(reCaptcha):
         if reCaptchaParams.get('proxy'):
             client.session.proxies = reCaptchaParams.get('proxies')
 
-        task = NoCaptchaTaskProxylessTask(site_url, site_key)
+        captchaMap = {
+            'reCaptcha': NoCaptchaTaskProxylessTask,
+            'hCaptcha': HCaptchaTaskProxyless
+        }
+
+        task = captchaMap[captchaType](url, siteKey)
 
         if not hasattr(client, 'createTaskSmee'):
             raise NotImplementedError(
                 "Please upgrade 'python_anticaptcha' via pip or download it from "
-                "https://github.com/ad-m/python-anticaptcha"
+                "https://github.com/ad-m/python-anticaptcha/tree/hcaptcha"
             )
 
         job = client.createTaskSmee(task)
-        return job.get_solution_response()
+
+        try:
+            job.join(maximum_time=180)
+        except (AnticaptchaException) as e:
+            raise reCaptchaTimeout('{}'.format(getattr(e, 'message', e)))
+
+        if 'solution' in job._last_result:
+            return job.get_solution_response()
+        else:
+            raise reCaptchaAPIError('Job did not return `solution` key in payload.')
 
 
 # ------------------------------------------------------------------------------- #
diff --git a/lib/cloudscraper/reCaptcha/deathbycaptcha.py b/lib/cloudscraper/reCaptcha/deathbycaptcha.py
index 7aeda916..6079c1d4 100644
--- a/lib/cloudscraper/reCaptcha/deathbycaptcha.py
+++ b/lib/cloudscraper/reCaptcha/deathbycaptcha.py
@@ -12,6 +12,7 @@ except ImportError:
     )
 
 from ..exceptions import (
+    reCaptchaException,
     reCaptchaServiceUnavailable,
     reCaptchaAccountError,
     reCaptchaTimeout,
@@ -154,7 +155,7 @@ class captchaSolver(reCaptcha):
 
     # ------------------------------------------------------------------------------- #
 
-    def requestSolve(self, site_url, site_key):
+    def requestSolve(self, url, siteKey):
         def _checkRequest(response):
             if response.ok and response.json().get("is_correct") and response.json().get('captcha'):
                 return response
@@ -172,8 +173,8 @@ class captchaSolver(reCaptcha):
                     'password': self.password,
                     'type': '4',
                     'token_params': json.dumps({
-                        'googlekey': site_key,
-                        'pageurl': site_url
+                        'googlekey': siteKey,
+                        'pageurl': url
                     })
                 },
                 allow_redirects=False
@@ -192,7 +193,7 @@ class captchaSolver(reCaptcha):
 
     # ------------------------------------------------------------------------------- #
 
-    def getCaptchaAnswer(self, site_url, site_key, reCaptchaParams):
+    def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams):
         jobID = None
 
         for param in ['username', 'password']:
@@ -202,11 +203,16 @@ class captchaSolver(reCaptcha):
                 )
             setattr(self, param, reCaptchaParams.get(param))
 
+        if captchaType == 'hCaptcha':
+            raise reCaptchaException(
+                'Provider does not support hCaptcha.'
+            )
+
         if reCaptchaParams.get('proxy'):
             self.session.proxies = reCaptchaParams.get('proxies')
 
         try:
-            jobID = self.requestSolve(site_url, site_key)
+            jobID = self.requestSolve(url, siteKey)
             return self.requestJob(jobID)
         except polling.TimeoutException:
             try:
diff --git a/lib/cloudscraper/user_agent/browsers.json b/lib/cloudscraper/user_agent/browsers.json
index a808788d..54a69541 100644
--- a/lib/cloudscraper/user_agent/browsers.json
+++ b/lib/cloudscraper/user_agent/browsers.json
@@ -19,8 +19,7 @@
             "ECDHE-RSA-CHACHA20-POLY1305",
             "AES128-GCM-SHA256",
             "AES256-GCM-SHA384",
-            "AES128-SHA",
-            "AES256-SHA"
+            "AES128-SHA"
         ],
         "releases": {
             "Chrome/50.0.0.0": {
@@ -12825,8 +12824,7 @@
             "ECDHE-ECDSA-AES128-SHA",
             "DHE-RSA-AES128-SHA",
             "DHE-RSA-AES256-SHA",
-            "AES128-SHA",
-            "AES256-SHA"
+            "AES128-SHA"
         ],
         "releases": {
             "Firefox/50.0": {