Aggiornato cloudscraper

This commit is contained in:
Alhaziel01
2020-04-05 10:45:28 +02:00
parent 34be96127b
commit 55c6ac7c8f
7 changed files with 178 additions and 81 deletions

View File

@@ -54,7 +54,7 @@ except ImportError:
# ------------------------------------------------------------------------------- #
__version__ = '1.2.30'
__version__ = '1.2.32'
# ------------------------------------------------------------------------------- #
@@ -148,6 +148,15 @@ class CloudScraper(Session):
def __getstate__(self):
return self.__dict__
# ------------------------------------------------------------------------------- #
# Raise an Exception with no stacktrace and reset depth counter.
# ------------------------------------------------------------------------------- #
def simpleException(self, exception, msg):
self._solveDepthCnt = 0
sys.tracebacklimit = 0
raise exception(msg)
# ------------------------------------------------------------------------------- #
# debug the request via the response
# ------------------------------------------------------------------------------- #
@@ -219,9 +228,8 @@ class CloudScraper(Session):
if self._solveDepthCnt >= self.solveDepth:
_ = self._solveDepthCnt
self._solveDepthCnt = 0
sys.tracebacklimit = 0
raise CloudflareLoopProtection(
self.simpleException(
CloudflareLoopProtection,
"!!Loop Protection!! We have tried to solve {} time(s) in a row.".format(_)
)
@@ -303,8 +311,10 @@ class CloudScraper(Session):
def is_Challenge_Request(self, resp):
if self.is_Firewall_Blocked(resp):
sys.tracebacklimit = 0
raise CloudflareCode1020('Cloudflare has blocked this request (Code 1020 Detected).')
self.simpleException(
CloudflareCode1020,
'Cloudflare has blocked this request (Code 1020 Detected).'
)
if self.is_reCaptcha_Challenge(resp) or self.is_IUAM_Challenge(resp):
return True
@@ -317,16 +327,29 @@ class CloudScraper(Session):
def IUAM_Challenge_Response(self, body, url, interpreter):
try:
challengeUUID = re.search(
r'id="challenge-form" action="(?P<challengeUUID>\S+)"',
body, re.M | re.DOTALL
).groupdict().get('challengeUUID', '')
formPayload = re.search(
r'<form (?P<form>id="challenge-form" action="(?P<challengeUUID>.*?'
r'__cf_chl_jschl_tk__=\S+)"(.*?)</form>)',
body,
re.M | re.DOTALL
).groupdict()
payload = OrderedDict(re.findall(r'name="(r|jschl_vc|pass)"\svalue="(.*?)"', body))
if not all(key in formPayload for key in ['form', 'challengeUUID']):
self.simpleException(
CloudflareIUAMError,
"Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
)
payload = OrderedDict(
re.findall(
r'name="(r|jschl_vc|pass)"\svalue="(.*?)"',
formPayload['form']
)
)
except AttributeError:
sys.tracebacklimit = 0
raise CloudflareIUAMError(
self.simpleException(
CloudflareIUAMError,
"Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
)
@@ -337,8 +360,8 @@ class CloudScraper(Session):
interpreter
).solveChallenge(body, hostParsed.netloc)
except Exception as e:
sys.tracebacklimit = 0
raise CloudflareIUAMError(
self.simpleException(
CloudflareIUAMError,
'Unable to parse Cloudflare anti-bots page: {}'.format(
getattr(e, 'message', e)
)
@@ -348,7 +371,7 @@ class CloudScraper(Session):
'url': '{}://{}{}'.format(
hostParsed.scheme,
hostParsed.netloc,
self.unescape(challengeUUID)
self.unescape(formPayload['challengeUUID'])
),
'data': payload
}
@@ -359,34 +382,62 @@ class CloudScraper(Session):
def reCaptcha_Challenge_Response(self, provider, provider_params, body, url):
try:
payload = re.search(
r'(name="r"\svalue="(?P<r>\S+)"|).*?challenge-form" action="(?P<challengeUUID>\S+)".*?'
r'data-ray="(?P<data_ray>\S+)".*?data-sitekey="(?P<site_key>\S+)"',
body, re.M | re.DOTALL
formPayload = re.search(
r'<form class="challenge-form" (?P<form>id="challenge-form" '
r'action="(?P<challengeUUID>.*?__cf_chl_captcha_tk__=\S+)"(.*?)</form>)',
body,
re.M | re.DOTALL
).groupdict()
except (AttributeError):
sys.tracebacklimit = 0
raise CloudflareReCaptchaError(
if not all(key in formPayload for key in ['form', 'challengeUUID']):
self.simpleException(
CloudflareReCaptchaError,
"Cloudflare reCaptcha detected, unfortunately we can't extract the parameters correctly."
)
payload = OrderedDict(
re.findall(
r'(name="r"\svalue|data-ray|data-sitekey|name="cf_captcha_kind"\svalue)="(.*?)"',
formPayload['form']
)
)
captchaType = 'reCaptcha' if payload['name="cf_captcha_kind" value'] == 're' else 'hCaptcha'
except (AttributeError, KeyError):
self.simpleException(
CloudflareReCaptchaError,
"Cloudflare reCaptcha detected, unfortunately we can't extract the parameters correctly."
)
captchaResponse = reCaptcha.dynamicImport(
provider.lower()
).solveCaptcha(
captchaType,
url,
payload['data-sitekey'],
provider_params
)
dataPayload = OrderedDict([
('r', payload.get('name="r" value', '')),
('cf_captcha_kind', payload['name="cf_captcha_kind" value']),
('id', payload.get('data-ray')),
('g-recaptcha-response', captchaResponse)
])
if captchaType == 'hCaptcha':
dataPayload.update({'h-captcha-response': captchaResponse})
hostParsed = urlparse(url)
return {
'url': '{}://{}{}'.format(
hostParsed.scheme,
hostParsed.netloc,
self.unescape(payload.get('challengeUUID', ''))
self.unescape(formPayload['challengeUUID'])
),
'data': OrderedDict([
('r', payload.get('r', '')),
('id', payload.get('data_ray')),
(
'g-recaptcha-response',
reCaptcha.dynamicImport(
provider.lower()
).solveCaptcha(url, payload.get('site_key'), provider_params)
)
])
'data': dataPayload
}
# ------------------------------------------------------------------------------- #
@@ -412,8 +463,8 @@ class CloudScraper(Session):
# ------------------------------------------------------------------------------- #
if not self.recaptcha or not isinstance(self.recaptcha, dict) or not self.recaptcha.get('provider'):
sys.tracebacklimit = 0
raise CloudflareReCaptchaProvider(
self.simpleException(
CloudflareReCaptchaProvider,
"Cloudflare reCaptcha detected, unfortunately you haven't loaded an anti reCaptcha provider "
"correctly via the 'recaptcha' parameter."
)
@@ -448,8 +499,10 @@ class CloudScraper(Session):
if isinstance(delay, (int, float)):
self.delay = delay
except (AttributeError, ValueError):
sys.tracebacklimit = 0
raise CloudflareIUAMError("Cloudflare IUAM possibility malformed, issue extracing delay value.")
self.simpleException(
CloudflareIUAMError,
"Cloudflare IUAM possibility malformed, issue extracing delay value."
)
sleep(self.delay)
@@ -507,6 +560,7 @@ class CloudScraper(Session):
if not challengeSubmitResponse.is_redirect:
return challengeSubmitResponse
else:
cloudflare_kwargs = deepcopy(kwargs)
cloudflare_kwargs['headers'] = updateAttr(
@@ -535,6 +589,7 @@ class CloudScraper(Session):
# ------------------------------------------------------------------------------- #
return self.request(resp.request.method, resp.url, **kwargs)
# ------------------------------------------------------------------------------- #
@classmethod
@@ -587,8 +642,8 @@ class CloudScraper(Session):
cookie_domain = d
break
else:
sys.tracebacklimit = 0
raise CloudflareIUAMError(
cls.simpleException(
CloudflareIUAMError,
"Unable to find Cloudflare cookies. Does the site actually "
"have Cloudflare IUAM (I'm Under Attack Mode) enabled?"
)