From 55c6ac7c8f747d9433f5522d2a3b142e960d938e Mon Sep 17 00:00:00 2001 From: Alhaziel01 Date: Sun, 5 Apr 2020 10:45:28 +0200 Subject: [PATCH] Aggiornato cloudscraper --- lib/cloudscraper/__init__.py | 135 +++++++++++++------ lib/cloudscraper/reCaptcha/2captcha.py | 44 +++--- lib/cloudscraper/reCaptcha/9kw.py | 16 ++- lib/cloudscraper/reCaptcha/__init__.py | 6 +- lib/cloudscraper/reCaptcha/anticaptcha.py | 36 +++-- lib/cloudscraper/reCaptcha/deathbycaptcha.py | 16 ++- lib/cloudscraper/user_agent/browsers.json | 6 +- 7 files changed, 178 insertions(+), 81 deletions(-) diff --git a/lib/cloudscraper/__init__.py b/lib/cloudscraper/__init__.py index a899b145..31a4cd36 100644 --- a/lib/cloudscraper/__init__.py +++ b/lib/cloudscraper/__init__.py @@ -54,7 +54,7 @@ except ImportError: # ------------------------------------------------------------------------------- # -__version__ = '1.2.30' +__version__ = '1.2.32' # ------------------------------------------------------------------------------- # @@ -148,6 +148,15 @@ class CloudScraper(Session): def __getstate__(self): return self.__dict__ + # ------------------------------------------------------------------------------- # + # Raise an Exception with no stacktrace and reset depth counter. + # ------------------------------------------------------------------------------- # + + def simpleException(self, exception, msg): + self._solveDepthCnt = 0 + sys.tracebacklimit = 0 + raise exception(msg) + # ------------------------------------------------------------------------------- # # debug the request via the response # ------------------------------------------------------------------------------- # @@ -219,9 +228,8 @@ class CloudScraper(Session): if self._solveDepthCnt >= self.solveDepth: _ = self._solveDepthCnt - self._solveDepthCnt = 0 - sys.tracebacklimit = 0 - raise CloudflareLoopProtection( + self.simpleException( + CloudflareLoopProtection, "!!Loop Protection!! We have tried to solve {} time(s) in a row.".format(_) ) @@ -303,8 +311,10 @@ class CloudScraper(Session): def is_Challenge_Request(self, resp): if self.is_Firewall_Blocked(resp): - sys.tracebacklimit = 0 - raise CloudflareCode1020('Cloudflare has blocked this request (Code 1020 Detected).') + self.simpleException( + CloudflareCode1020, + 'Cloudflare has blocked this request (Code 1020 Detected).' + ) if self.is_reCaptcha_Challenge(resp) or self.is_IUAM_Challenge(resp): return True @@ -317,16 +327,29 @@ class CloudScraper(Session): def IUAM_Challenge_Response(self, body, url, interpreter): try: - challengeUUID = re.search( - r'id="challenge-form" action="(?P\S+)"', - body, re.M | re.DOTALL - ).groupdict().get('challengeUUID', '') + formPayload = re.search( + r'
id="challenge-form" action="(?P.*?' + r'__cf_chl_jschl_tk__=\S+)"(.*?))', + body, + re.M | re.DOTALL + ).groupdict() - payload = OrderedDict(re.findall(r'name="(r|jschl_vc|pass)"\svalue="(.*?)"', body)) + if not all(key in formPayload for key in ['form', 'challengeUUID']): + self.simpleException( + CloudflareIUAMError, + "Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly." + ) + + payload = OrderedDict( + re.findall( + r'name="(r|jschl_vc|pass)"\svalue="(.*?)"', + formPayload['form'] + ) + ) except AttributeError: - sys.tracebacklimit = 0 - raise CloudflareIUAMError( + self.simpleException( + CloudflareIUAMError, "Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly." ) @@ -337,8 +360,8 @@ class CloudScraper(Session): interpreter ).solveChallenge(body, hostParsed.netloc) except Exception as e: - sys.tracebacklimit = 0 - raise CloudflareIUAMError( + self.simpleException( + CloudflareIUAMError, 'Unable to parse Cloudflare anti-bots page: {}'.format( getattr(e, 'message', e) ) @@ -348,7 +371,7 @@ class CloudScraper(Session): 'url': '{}://{}{}'.format( hostParsed.scheme, hostParsed.netloc, - self.unescape(challengeUUID) + self.unescape(formPayload['challengeUUID']) ), 'data': payload } @@ -359,34 +382,62 @@ class CloudScraper(Session): def reCaptcha_Challenge_Response(self, provider, provider_params, body, url): try: - payload = re.search( - r'(name="r"\svalue="(?P\S+)"|).*?challenge-form" action="(?P\S+)".*?' - r'data-ray="(?P\S+)".*?data-sitekey="(?P\S+)"', - body, re.M | re.DOTALL + formPayload = re.search( + r'
id="challenge-form" ' + r'action="(?P.*?__cf_chl_captcha_tk__=\S+)"(.*?))', + body, + re.M | re.DOTALL ).groupdict() - except (AttributeError): - sys.tracebacklimit = 0 - raise CloudflareReCaptchaError( + + if not all(key in formPayload for key in ['form', 'challengeUUID']): + self.simpleException( + CloudflareReCaptchaError, + "Cloudflare reCaptcha detected, unfortunately we can't extract the parameters correctly." + ) + + payload = OrderedDict( + re.findall( + r'(name="r"\svalue|data-ray|data-sitekey|name="cf_captcha_kind"\svalue)="(.*?)"', + formPayload['form'] + ) + ) + + captchaType = 'reCaptcha' if payload['name="cf_captcha_kind" value'] == 're' else 'hCaptcha' + + except (AttributeError, KeyError): + self.simpleException( + CloudflareReCaptchaError, "Cloudflare reCaptcha detected, unfortunately we can't extract the parameters correctly." ) + captchaResponse = reCaptcha.dynamicImport( + provider.lower() + ).solveCaptcha( + captchaType, + url, + payload['data-sitekey'], + provider_params + ) + + dataPayload = OrderedDict([ + ('r', payload.get('name="r" value', '')), + ('cf_captcha_kind', payload['name="cf_captcha_kind" value']), + ('id', payload.get('data-ray')), + ('g-recaptcha-response', captchaResponse) + ]) + + if captchaType == 'hCaptcha': + dataPayload.update({'h-captcha-response': captchaResponse}) + hostParsed = urlparse(url) + return { 'url': '{}://{}{}'.format( hostParsed.scheme, hostParsed.netloc, - self.unescape(payload.get('challengeUUID', '')) + self.unescape(formPayload['challengeUUID']) ), - 'data': OrderedDict([ - ('r', payload.get('r', '')), - ('id', payload.get('data_ray')), - ( - 'g-recaptcha-response', - reCaptcha.dynamicImport( - provider.lower() - ).solveCaptcha(url, payload.get('site_key'), provider_params) - ) - ]) + 'data': dataPayload } # ------------------------------------------------------------------------------- # @@ -412,8 +463,8 @@ class CloudScraper(Session): # ------------------------------------------------------------------------------- # if not self.recaptcha or not isinstance(self.recaptcha, dict) or not self.recaptcha.get('provider'): - sys.tracebacklimit = 0 - raise CloudflareReCaptchaProvider( + self.simpleException( + CloudflareReCaptchaProvider, "Cloudflare reCaptcha detected, unfortunately you haven't loaded an anti reCaptcha provider " "correctly via the 'recaptcha' parameter." ) @@ -448,8 +499,10 @@ class CloudScraper(Session): if isinstance(delay, (int, float)): self.delay = delay except (AttributeError, ValueError): - sys.tracebacklimit = 0 - raise CloudflareIUAMError("Cloudflare IUAM possibility malformed, issue extracing delay value.") + self.simpleException( + CloudflareIUAMError, + "Cloudflare IUAM possibility malformed, issue extracing delay value." + ) sleep(self.delay) @@ -507,6 +560,7 @@ class CloudScraper(Session): if not challengeSubmitResponse.is_redirect: return challengeSubmitResponse + else: cloudflare_kwargs = deepcopy(kwargs) cloudflare_kwargs['headers'] = updateAttr( @@ -535,6 +589,7 @@ class CloudScraper(Session): # ------------------------------------------------------------------------------- # return self.request(resp.request.method, resp.url, **kwargs) + # ------------------------------------------------------------------------------- # @classmethod @@ -587,8 +642,8 @@ class CloudScraper(Session): cookie_domain = d break else: - sys.tracebacklimit = 0 - raise CloudflareIUAMError( + cls.simpleException( + CloudflareIUAMError, "Unable to find Cloudflare cookies. Does the site actually " "have Cloudflare IUAM (I'm Under Attack Mode) enabled?" ) diff --git a/lib/cloudscraper/reCaptcha/2captcha.py b/lib/cloudscraper/reCaptcha/2captcha.py index b3a71fb9..e4789237 100644 --- a/lib/cloudscraper/reCaptcha/2captcha.py +++ b/lib/cloudscraper/reCaptcha/2captcha.py @@ -2,7 +2,6 @@ from __future__ import absolute_import import requests - from ..exceptions import ( reCaptchaServiceUnavailable, reCaptchaAPIError, @@ -81,7 +80,7 @@ class captchaSolver(reCaptcha): } } - if response.json().get('status') is False and response.json().get('request') in errors.get(request_type): + if response.json().get('status') == 0 and response.json().get('request') in errors.get(request_type): raise reCaptchaAPIError( '{} {}'.format( response.json().get('request'), @@ -113,7 +112,8 @@ class captchaSolver(reCaptcha): 'action': 'reportbad', 'id': jobID, 'json': '1' - } + }, + timeout=30 ), check_success=_checkRequest, step=5, @@ -149,7 +149,8 @@ class captchaSolver(reCaptcha): 'action': 'get', 'id': jobID, 'json': '1' - } + }, + timeout=30 ), check_success=_checkRequest, step=5, @@ -165,7 +166,7 @@ class captchaSolver(reCaptcha): # ------------------------------------------------------------------------------- # - def requestSolve(self, site_url, site_key): + def requestSolve(self, captchaType, url, siteKey): def _checkRequest(response): if response.ok and response.json().get("status") == 1 and response.json().get('request'): return response @@ -174,18 +175,29 @@ class captchaSolver(reCaptcha): return None + data = { + 'key': self.api_key, + 'pageurl': url, + 'json': 1, + 'soft_id': 5507698 + } + + data.update( + { + 'method': 'userrcaptcha', + 'googlekey': siteKey + } if captchaType == 'reCaptcha' else { + 'method': 'hcaptcha', + 'sitekey': siteKey + } + ) + response = polling.poll( lambda: self.session.post( '{}/in.php'.format(self.host), - data={ - 'key': self.api_key, - 'method': 'userrecaptcha', - 'googlekey': site_key, - 'pageurl': site_url, - 'json': '1', - 'soft_id': '5507698' - }, - allow_redirects=False + data=data, + allow_redirects=False, + timeout=30 ), check_success=_checkRequest, step=5, @@ -201,7 +213,7 @@ class captchaSolver(reCaptcha): # ------------------------------------------------------------------------------- # - def getCaptchaAnswer(self, site_url, site_key, reCaptchaParams): + def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams): jobID = None if not reCaptchaParams.get('api_key'): @@ -215,7 +227,7 @@ class captchaSolver(reCaptcha): self.session.proxies = reCaptchaParams.get('proxies') try: - jobID = self.requestSolve(site_url, site_key) + jobID = self.requestSolve(captchaType, url, siteKey) return self.requestJob(jobID) except polling.TimeoutException: try: diff --git a/lib/cloudscraper/reCaptcha/9kw.py b/lib/cloudscraper/reCaptcha/9kw.py index 212b44d8..2404bfe5 100644 --- a/lib/cloudscraper/reCaptcha/9kw.py +++ b/lib/cloudscraper/reCaptcha/9kw.py @@ -12,6 +12,7 @@ except ImportError: ) from ..exceptions import ( + reCaptchaException, reCaptchaServiceUnavailable, reCaptchaAPIError, reCaptchaTimeout, @@ -143,7 +144,7 @@ class captchaSolver(reCaptcha): # ------------------------------------------------------------------------------- # - def requestSolve(self, site_url, site_key): + def requestSolve(self, url, siteKey): def _checkRequest(response): if response.ok and response.text.startswith('{') and response.json().get('captchaid'): return response @@ -159,9 +160,9 @@ class captchaSolver(reCaptcha): 'apikey': self.api_key, 'action': 'usercaptchaupload', 'interactive': 1, - 'file-upload-01': site_key, + 'file-upload-01': siteKey, 'oldsource': 'recaptchav2', - 'pageurl': site_url, + 'pageurl': url, 'maxtimeout': self.maxtimeout, 'json': 1 }, @@ -179,12 +180,17 @@ class captchaSolver(reCaptcha): # ------------------------------------------------------------------------------- # - def getCaptchaAnswer(self, site_url, site_key, reCaptchaParams): + def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams): jobID = None if not reCaptchaParams.get('api_key'): raise reCaptchaParameter("9kw: Missing api_key parameter.") + if captchaType == 'hCaptcha': + raise reCaptchaException( + 'Provider does not support hCaptcha.' + ) + self.api_key = reCaptchaParams.get('api_key') if reCaptchaParams.get('maxtimeout'): @@ -194,7 +200,7 @@ class captchaSolver(reCaptcha): self.session.proxies = reCaptchaParams.get('proxies') try: - jobID = self.requestSolve(site_url, site_key) + jobID = self.requestSolve(url, siteKey) return self.requestJob(jobID) except polling.TimeoutException: raise reCaptchaTimeout( diff --git a/lib/cloudscraper/reCaptcha/__init__.py b/lib/cloudscraper/reCaptcha/__init__.py index dee27fcf..f23d2601 100644 --- a/lib/cloudscraper/reCaptcha/__init__.py +++ b/lib/cloudscraper/reCaptcha/__init__.py @@ -37,10 +37,10 @@ class reCaptcha(ABC): # ------------------------------------------------------------------------------- # @abc.abstractmethod - def getCaptchaAnswer(self, site_url, site_key, reCaptchaParams): + def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams): pass # ------------------------------------------------------------------------------- # - def solveCaptcha(self, site_url, site_key, reCaptchaParams): - return self.getCaptchaAnswer(site_url, site_key, reCaptchaParams) + def solveCaptcha(self, captchaType, url, siteKey, reCaptchaParams): + return self.getCaptchaAnswer(captchaType, url, siteKey, reCaptchaParams) diff --git a/lib/cloudscraper/reCaptcha/anticaptcha.py b/lib/cloudscraper/reCaptcha/anticaptcha.py index c6cae275..3c45abe0 100644 --- a/lib/cloudscraper/reCaptcha/anticaptcha.py +++ b/lib/cloudscraper/reCaptcha/anticaptcha.py @@ -1,16 +1,22 @@ from __future__ import absolute_import -from ..exceptions import reCaptchaParameter +from ..exceptions import ( + reCaptchaParameter, + reCaptchaTimeout, + reCaptchaAPIError +) try: from python_anticaptcha import ( AnticaptchaClient, - NoCaptchaTaskProxylessTask + NoCaptchaTaskProxylessTask, + HCaptchaTaskProxyless, + AnticaptchaException ) except ImportError: raise ImportError( - "Please install the python module 'python_anticaptcha' via pip or download it from " - "https://github.com/ad-m/python-anticaptcha" + "Please install/upgrade the python module 'python_anticaptcha' via " + "pip install python-anticaptcha or https://github.com/ad-m/python-anticaptcha/" ) from . import reCaptcha @@ -23,7 +29,7 @@ class captchaSolver(reCaptcha): # ------------------------------------------------------------------------------- # - def getCaptchaAnswer(self, site_url, site_key, reCaptchaParams): + def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams): if not reCaptchaParams.get('api_key'): raise reCaptchaParameter("anticaptcha: Missing api_key parameter.") @@ -32,16 +38,30 @@ class captchaSolver(reCaptcha): if reCaptchaParams.get('proxy'): client.session.proxies = reCaptchaParams.get('proxies') - task = NoCaptchaTaskProxylessTask(site_url, site_key) + captchaMap = { + 'reCaptcha': NoCaptchaTaskProxylessTask, + 'hCaptcha': HCaptchaTaskProxyless + } + + task = captchaMap[captchaType](url, siteKey) if not hasattr(client, 'createTaskSmee'): raise NotImplementedError( "Please upgrade 'python_anticaptcha' via pip or download it from " - "https://github.com/ad-m/python-anticaptcha" + "https://github.com/ad-m/python-anticaptcha/tree/hcaptcha" ) job = client.createTaskSmee(task) - return job.get_solution_response() + + try: + job.join(maximum_time=180) + except (AnticaptchaException) as e: + raise reCaptchaTimeout('{}'.format(getattr(e, 'message', e))) + + if 'solution' in job._last_result: + return job.get_solution_response() + else: + raise reCaptchaAPIError('Job did not return `solution` key in payload.') # ------------------------------------------------------------------------------- # diff --git a/lib/cloudscraper/reCaptcha/deathbycaptcha.py b/lib/cloudscraper/reCaptcha/deathbycaptcha.py index 7aeda916..6079c1d4 100644 --- a/lib/cloudscraper/reCaptcha/deathbycaptcha.py +++ b/lib/cloudscraper/reCaptcha/deathbycaptcha.py @@ -12,6 +12,7 @@ except ImportError: ) from ..exceptions import ( + reCaptchaException, reCaptchaServiceUnavailable, reCaptchaAccountError, reCaptchaTimeout, @@ -154,7 +155,7 @@ class captchaSolver(reCaptcha): # ------------------------------------------------------------------------------- # - def requestSolve(self, site_url, site_key): + def requestSolve(self, url, siteKey): def _checkRequest(response): if response.ok and response.json().get("is_correct") and response.json().get('captcha'): return response @@ -172,8 +173,8 @@ class captchaSolver(reCaptcha): 'password': self.password, 'type': '4', 'token_params': json.dumps({ - 'googlekey': site_key, - 'pageurl': site_url + 'googlekey': siteKey, + 'pageurl': url }) }, allow_redirects=False @@ -192,7 +193,7 @@ class captchaSolver(reCaptcha): # ------------------------------------------------------------------------------- # - def getCaptchaAnswer(self, site_url, site_key, reCaptchaParams): + def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams): jobID = None for param in ['username', 'password']: @@ -202,11 +203,16 @@ class captchaSolver(reCaptcha): ) setattr(self, param, reCaptchaParams.get(param)) + if captchaType == 'hCaptcha': + raise reCaptchaException( + 'Provider does not support hCaptcha.' + ) + if reCaptchaParams.get('proxy'): self.session.proxies = reCaptchaParams.get('proxies') try: - jobID = self.requestSolve(site_url, site_key) + jobID = self.requestSolve(url, siteKey) return self.requestJob(jobID) except polling.TimeoutException: try: diff --git a/lib/cloudscraper/user_agent/browsers.json b/lib/cloudscraper/user_agent/browsers.json index a808788d..54a69541 100644 --- a/lib/cloudscraper/user_agent/browsers.json +++ b/lib/cloudscraper/user_agent/browsers.json @@ -19,8 +19,7 @@ "ECDHE-RSA-CHACHA20-POLY1305", "AES128-GCM-SHA256", "AES256-GCM-SHA384", - "AES128-SHA", - "AES256-SHA" + "AES128-SHA" ], "releases": { "Chrome/50.0.0.0": { @@ -12825,8 +12824,7 @@ "ECDHE-ECDSA-AES128-SHA", "DHE-RSA-AES128-SHA", "DHE-RSA-AES256-SHA", - "AES128-SHA", - "AES256-SHA" + "AES128-SHA" ], "releases": { "Firefox/50.0": {