Aggiornamento Cloudscraper

This commit is contained in:
Alhaziel
2020-01-13 16:06:40 +01:00
parent c5cb55bf5e
commit 7e4371f482
+57 -13
View File
@@ -1,4 +1,3 @@
# https://github.com/VeNoMouS/cloudscraper
import logging import logging
import re import re
import sys import sys
@@ -38,7 +37,7 @@ except ImportError:
# ------------------------------------------------------------------------------- # # ------------------------------------------------------------------------------- #
__version__ = '1.2.16' __version__ = '1.2.19'
# ------------------------------------------------------------------------------- # # ------------------------------------------------------------------------------- #
@@ -163,7 +162,6 @@ class CloudScraper(Session):
def request(self, method, url, *args, **kwargs): def request(self, method, url, *args, **kwargs):
# pylint: disable=E0203 # pylint: disable=E0203
if kwargs.get('proxies') and kwargs.get('proxies') != self.proxies: if kwargs.get('proxies') and kwargs.get('proxies') != self.proxies:
self.proxies = kwargs.get('proxies') self.proxies = kwargs.get('proxies')
@@ -198,6 +196,7 @@ class CloudScraper(Session):
else: else:
if not resp.is_redirect and resp.status_code not in [429, 503]: if not resp.is_redirect and resp.status_code not in [429, 503]:
self._solveDepthCnt = 0 self._solveDepthCnt = 0
return resp return resp
# ------------------------------------------------------------------------------- # # ------------------------------------------------------------------------------- #
@@ -243,7 +242,7 @@ class CloudScraper(Session):
return False return False
# ------------------------------------------------------------------------------- # # ------------------------------------------------------------------------------- #
# check if the response contains a valid Cloudflare reCaptcha challenge # check if the response contains Firewall 1020 Error
# ------------------------------------------------------------------------------- # # ------------------------------------------------------------------------------- #
@staticmethod @staticmethod
@@ -270,10 +269,7 @@ class CloudScraper(Session):
def is_Challenge_Request(self, resp): def is_Challenge_Request(self, resp):
if self.is_Firewall_Blocked(resp): if self.is_Firewall_Blocked(resp):
sys.tracebacklimit = 0 sys.tracebacklimit = 0
raise RuntimeError( raise RuntimeError('Cloudflare has blocked this request (Code 1020 Detected).')
'Cloudflare has a restriction on your IP (Code 1020 Detected), '
'you are BLOCKED.'
)
if self.is_reCaptcha_Challenge(resp) or self.is_IUAM_Challenge(resp): if self.is_reCaptcha_Challenge(resp) or self.is_IUAM_Challenge(resp):
return True return True
@@ -434,6 +430,7 @@ class CloudScraper(Session):
# ------------------------------------------------------------------------------- # # ------------------------------------------------------------------------------- #
if submit_url: if submit_url:
def updateAttr(obj, name, newValue): def updateAttr(obj, name, newValue):
try: try:
obj[name].update(newValue) obj[name].update(newValue)
@@ -450,13 +447,18 @@ class CloudScraper(Session):
'data', 'data',
submit_url['data'] submit_url['data']
) )
urlParsed = urlparse(resp.url)
cloudflare_kwargs['headers'] = updateAttr( cloudflare_kwargs['headers'] = updateAttr(
cloudflare_kwargs, cloudflare_kwargs,
'headers', 'headers',
{'Referer': resp.url} {
'Origin': '{}://{}'.format(urlParsed.scheme, urlParsed.netloc),
'Referer': resp.url
}
) )
ret = self.request( challengeSubmitResponse = self.request(
'POST', 'POST',
submit_url['url'], submit_url['url'],
**cloudflare_kwargs **cloudflare_kwargs
@@ -464,13 +466,44 @@ class CloudScraper(Session):
# ------------------------------------------------------------------------------- # # ------------------------------------------------------------------------------- #
# Return response if Cloudflare is doing content pass through instead of 3xx # Return response if Cloudflare is doing content pass through instead of 3xx
# else request with redirect URL also handle protocol scheme change http -> https
# ------------------------------------------------------------------------------- # # ------------------------------------------------------------------------------- #
if not ret.is_redirect: if not challengeSubmitResponse.is_redirect:
return ret return challengeSubmitResponse
else:
cloudflare_kwargs = deepcopy(kwargs)
if not urlparse(challengeSubmitResponse.headers['Location']).netloc:
cloudflare_kwargs['headers'] = updateAttr(
cloudflare_kwargs,
'headers',
{'Referer': '{}://{}'.format(urlParsed.scheme, urlParsed.netloc)}
)
return self.request(
resp.request.method,
'{}://{}{}'.format(
urlParsed.scheme,
urlParsed.netloc,
challengeSubmitResponse.headers['Location']
),
**cloudflare_kwargs
)
else:
redirectParsed = urlparse(challengeSubmitResponse.headers['Location'])
cloudflare_kwargs['headers'] = updateAttr(
cloudflare_kwargs,
'headers',
{'Referer': '{}://{}'.format(redirectParsed.scheme, redirectParsed.netloc)}
)
return self.request(
resp.request.method,
challengeSubmitResponse.headers['Location'],
**cloudflare_kwargs
)
# ------------------------------------------------------------------------------- # # ------------------------------------------------------------------------------- #
# Cloudflare is doing http 3xx instead of pass through again.... # We shouldn't be here...
# Re-request the original query and/or process again.... # Re-request the original query and/or process again....
# ------------------------------------------------------------------------------- # # ------------------------------------------------------------------------------- #
@@ -554,6 +587,17 @@ class CloudScraper(Session):
# ------------------------------------------------------------------------------- # # ------------------------------------------------------------------------------- #
if ssl.OPENSSL_VERSION_INFO < (1, 1, 1):
print(
"DEPRECATION: The OpenSSL being used by this python install ({}) does not meet the minimum supported "
"version (>= OpenSSL 1.1.1) in order to support TLS 1.3 required by Cloudflare, "
"You may encounter an unexpected reCaptcha or cloudflare 1020 blocks.".format(
ssl.OPENSSL_VERSION
)
)
# ------------------------------------------------------------------------------- #
create_scraper = CloudScraper.create_scraper create_scraper = CloudScraper.create_scraper
get_tokens = CloudScraper.get_tokens get_tokens = CloudScraper.get_tokens
get_cookie_string = CloudScraper.get_cookie_string get_cookie_string = CloudScraper.get_cookie_string