Aggiornato cloudscraper

This commit is contained in:
Alhaziel01
2020-04-05 10:45:28 +02:00
parent 34be96127b
commit 55c6ac7c8f
7 changed files with 178 additions and 81 deletions

View File

@@ -54,7 +54,7 @@ except ImportError:
# ------------------------------------------------------------------------------- #
__version__ = '1.2.30'
__version__ = '1.2.32'
# ------------------------------------------------------------------------------- #
@@ -148,6 +148,15 @@ class CloudScraper(Session):
def __getstate__(self):
return self.__dict__
# ------------------------------------------------------------------------------- #
# Raise an Exception with no stacktrace and reset depth counter.
# ------------------------------------------------------------------------------- #
def simpleException(self, exception, msg):
self._solveDepthCnt = 0
sys.tracebacklimit = 0
raise exception(msg)
# ------------------------------------------------------------------------------- #
# debug the request via the response
# ------------------------------------------------------------------------------- #
@@ -219,9 +228,8 @@ class CloudScraper(Session):
if self._solveDepthCnt >= self.solveDepth:
_ = self._solveDepthCnt
self._solveDepthCnt = 0
sys.tracebacklimit = 0
raise CloudflareLoopProtection(
self.simpleException(
CloudflareLoopProtection,
"!!Loop Protection!! We have tried to solve {} time(s) in a row.".format(_)
)
@@ -303,8 +311,10 @@ class CloudScraper(Session):
def is_Challenge_Request(self, resp):
if self.is_Firewall_Blocked(resp):
sys.tracebacklimit = 0
raise CloudflareCode1020('Cloudflare has blocked this request (Code 1020 Detected).')
self.simpleException(
CloudflareCode1020,
'Cloudflare has blocked this request (Code 1020 Detected).'
)
if self.is_reCaptcha_Challenge(resp) or self.is_IUAM_Challenge(resp):
return True
@@ -317,16 +327,29 @@ class CloudScraper(Session):
def IUAM_Challenge_Response(self, body, url, interpreter):
try:
challengeUUID = re.search(
r'id="challenge-form" action="(?P<challengeUUID>\S+)"',
body, re.M | re.DOTALL
).groupdict().get('challengeUUID', '')
formPayload = re.search(
r'<form (?P<form>id="challenge-form" action="(?P<challengeUUID>.*?'
r'__cf_chl_jschl_tk__=\S+)"(.*?)</form>)',
body,
re.M | re.DOTALL
).groupdict()
payload = OrderedDict(re.findall(r'name="(r|jschl_vc|pass)"\svalue="(.*?)"', body))
if not all(key in formPayload for key in ['form', 'challengeUUID']):
self.simpleException(
CloudflareIUAMError,
"Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
)
payload = OrderedDict(
re.findall(
r'name="(r|jschl_vc|pass)"\svalue="(.*?)"',
formPayload['form']
)
)
except AttributeError:
sys.tracebacklimit = 0
raise CloudflareIUAMError(
self.simpleException(
CloudflareIUAMError,
"Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
)
@@ -337,8 +360,8 @@ class CloudScraper(Session):
interpreter
).solveChallenge(body, hostParsed.netloc)
except Exception as e:
sys.tracebacklimit = 0
raise CloudflareIUAMError(
self.simpleException(
CloudflareIUAMError,
'Unable to parse Cloudflare anti-bots page: {}'.format(
getattr(e, 'message', e)
)
@@ -348,7 +371,7 @@ class CloudScraper(Session):
'url': '{}://{}{}'.format(
hostParsed.scheme,
hostParsed.netloc,
self.unescape(challengeUUID)
self.unescape(formPayload['challengeUUID'])
),
'data': payload
}
@@ -359,34 +382,62 @@ class CloudScraper(Session):
def reCaptcha_Challenge_Response(self, provider, provider_params, body, url):
try:
payload = re.search(
r'(name="r"\svalue="(?P<r>\S+)"|).*?challenge-form" action="(?P<challengeUUID>\S+)".*?'
r'data-ray="(?P<data_ray>\S+)".*?data-sitekey="(?P<site_key>\S+)"',
body, re.M | re.DOTALL
formPayload = re.search(
r'<form class="challenge-form" (?P<form>id="challenge-form" '
r'action="(?P<challengeUUID>.*?__cf_chl_captcha_tk__=\S+)"(.*?)</form>)',
body,
re.M | re.DOTALL
).groupdict()
except (AttributeError):
sys.tracebacklimit = 0
raise CloudflareReCaptchaError(
if not all(key in formPayload for key in ['form', 'challengeUUID']):
self.simpleException(
CloudflareReCaptchaError,
"Cloudflare reCaptcha detected, unfortunately we can't extract the parameters correctly."
)
payload = OrderedDict(
re.findall(
r'(name="r"\svalue|data-ray|data-sitekey|name="cf_captcha_kind"\svalue)="(.*?)"',
formPayload['form']
)
)
captchaType = 'reCaptcha' if payload['name="cf_captcha_kind" value'] == 're' else 'hCaptcha'
except (AttributeError, KeyError):
self.simpleException(
CloudflareReCaptchaError,
"Cloudflare reCaptcha detected, unfortunately we can't extract the parameters correctly."
)
captchaResponse = reCaptcha.dynamicImport(
provider.lower()
).solveCaptcha(
captchaType,
url,
payload['data-sitekey'],
provider_params
)
dataPayload = OrderedDict([
('r', payload.get('name="r" value', '')),
('cf_captcha_kind', payload['name="cf_captcha_kind" value']),
('id', payload.get('data-ray')),
('g-recaptcha-response', captchaResponse)
])
if captchaType == 'hCaptcha':
dataPayload.update({'h-captcha-response': captchaResponse})
hostParsed = urlparse(url)
return {
'url': '{}://{}{}'.format(
hostParsed.scheme,
hostParsed.netloc,
self.unescape(payload.get('challengeUUID', ''))
self.unescape(formPayload['challengeUUID'])
),
'data': OrderedDict([
('r', payload.get('r', '')),
('id', payload.get('data_ray')),
(
'g-recaptcha-response',
reCaptcha.dynamicImport(
provider.lower()
).solveCaptcha(url, payload.get('site_key'), provider_params)
)
])
'data': dataPayload
}
# ------------------------------------------------------------------------------- #
@@ -412,8 +463,8 @@ class CloudScraper(Session):
# ------------------------------------------------------------------------------- #
if not self.recaptcha or not isinstance(self.recaptcha, dict) or not self.recaptcha.get('provider'):
sys.tracebacklimit = 0
raise CloudflareReCaptchaProvider(
self.simpleException(
CloudflareReCaptchaProvider,
"Cloudflare reCaptcha detected, unfortunately you haven't loaded an anti reCaptcha provider "
"correctly via the 'recaptcha' parameter."
)
@@ -448,8 +499,10 @@ class CloudScraper(Session):
if isinstance(delay, (int, float)):
self.delay = delay
except (AttributeError, ValueError):
sys.tracebacklimit = 0
raise CloudflareIUAMError("Cloudflare IUAM possibility malformed, issue extracing delay value.")
self.simpleException(
CloudflareIUAMError,
"Cloudflare IUAM possibility malformed, issue extracing delay value."
)
sleep(self.delay)
@@ -507,6 +560,7 @@ class CloudScraper(Session):
if not challengeSubmitResponse.is_redirect:
return challengeSubmitResponse
else:
cloudflare_kwargs = deepcopy(kwargs)
cloudflare_kwargs['headers'] = updateAttr(
@@ -535,6 +589,7 @@ class CloudScraper(Session):
# ------------------------------------------------------------------------------- #
return self.request(resp.request.method, resp.url, **kwargs)
# ------------------------------------------------------------------------------- #
@classmethod
@@ -587,8 +642,8 @@ class CloudScraper(Session):
cookie_domain = d
break
else:
sys.tracebacklimit = 0
raise CloudflareIUAMError(
cls.simpleException(
CloudflareIUAMError,
"Unable to find Cloudflare cookies. Does the site actually "
"have Cloudflare IUAM (I'm Under Attack Mode) enabled?"
)

View File

@@ -2,7 +2,6 @@ from __future__ import absolute_import
import requests
from ..exceptions import (
reCaptchaServiceUnavailable,
reCaptchaAPIError,
@@ -81,7 +80,7 @@ class captchaSolver(reCaptcha):
}
}
if response.json().get('status') is False and response.json().get('request') in errors.get(request_type):
if response.json().get('status') == 0 and response.json().get('request') in errors.get(request_type):
raise reCaptchaAPIError(
'{} {}'.format(
response.json().get('request'),
@@ -113,7 +112,8 @@ class captchaSolver(reCaptcha):
'action': 'reportbad',
'id': jobID,
'json': '1'
}
},
timeout=30
),
check_success=_checkRequest,
step=5,
@@ -149,7 +149,8 @@ class captchaSolver(reCaptcha):
'action': 'get',
'id': jobID,
'json': '1'
}
},
timeout=30
),
check_success=_checkRequest,
step=5,
@@ -165,7 +166,7 @@ class captchaSolver(reCaptcha):
# ------------------------------------------------------------------------------- #
def requestSolve(self, site_url, site_key):
def requestSolve(self, captchaType, url, siteKey):
def _checkRequest(response):
if response.ok and response.json().get("status") == 1 and response.json().get('request'):
return response
@@ -174,18 +175,29 @@ class captchaSolver(reCaptcha):
return None
data = {
'key': self.api_key,
'pageurl': url,
'json': 1,
'soft_id': 5507698
}
data.update(
{
'method': 'userrcaptcha',
'googlekey': siteKey
} if captchaType == 'reCaptcha' else {
'method': 'hcaptcha',
'sitekey': siteKey
}
)
response = polling.poll(
lambda: self.session.post(
'{}/in.php'.format(self.host),
data={
'key': self.api_key,
'method': 'userrecaptcha',
'googlekey': site_key,
'pageurl': site_url,
'json': '1',
'soft_id': '5507698'
},
allow_redirects=False
data=data,
allow_redirects=False,
timeout=30
),
check_success=_checkRequest,
step=5,
@@ -201,7 +213,7 @@ class captchaSolver(reCaptcha):
# ------------------------------------------------------------------------------- #
def getCaptchaAnswer(self, site_url, site_key, reCaptchaParams):
def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams):
jobID = None
if not reCaptchaParams.get('api_key'):
@@ -215,7 +227,7 @@ class captchaSolver(reCaptcha):
self.session.proxies = reCaptchaParams.get('proxies')
try:
jobID = self.requestSolve(site_url, site_key)
jobID = self.requestSolve(captchaType, url, siteKey)
return self.requestJob(jobID)
except polling.TimeoutException:
try:

View File

@@ -12,6 +12,7 @@ except ImportError:
)
from ..exceptions import (
reCaptchaException,
reCaptchaServiceUnavailable,
reCaptchaAPIError,
reCaptchaTimeout,
@@ -143,7 +144,7 @@ class captchaSolver(reCaptcha):
# ------------------------------------------------------------------------------- #
def requestSolve(self, site_url, site_key):
def requestSolve(self, url, siteKey):
def _checkRequest(response):
if response.ok and response.text.startswith('{') and response.json().get('captchaid'):
return response
@@ -159,9 +160,9 @@ class captchaSolver(reCaptcha):
'apikey': self.api_key,
'action': 'usercaptchaupload',
'interactive': 1,
'file-upload-01': site_key,
'file-upload-01': siteKey,
'oldsource': 'recaptchav2',
'pageurl': site_url,
'pageurl': url,
'maxtimeout': self.maxtimeout,
'json': 1
},
@@ -179,12 +180,17 @@ class captchaSolver(reCaptcha):
# ------------------------------------------------------------------------------- #
def getCaptchaAnswer(self, site_url, site_key, reCaptchaParams):
def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams):
jobID = None
if not reCaptchaParams.get('api_key'):
raise reCaptchaParameter("9kw: Missing api_key parameter.")
if captchaType == 'hCaptcha':
raise reCaptchaException(
'Provider does not support hCaptcha.'
)
self.api_key = reCaptchaParams.get('api_key')
if reCaptchaParams.get('maxtimeout'):
@@ -194,7 +200,7 @@ class captchaSolver(reCaptcha):
self.session.proxies = reCaptchaParams.get('proxies')
try:
jobID = self.requestSolve(site_url, site_key)
jobID = self.requestSolve(url, siteKey)
return self.requestJob(jobID)
except polling.TimeoutException:
raise reCaptchaTimeout(

View File

@@ -37,10 +37,10 @@ class reCaptcha(ABC):
# ------------------------------------------------------------------------------- #
@abc.abstractmethod
def getCaptchaAnswer(self, site_url, site_key, reCaptchaParams):
def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams):
pass
# ------------------------------------------------------------------------------- #
def solveCaptcha(self, site_url, site_key, reCaptchaParams):
return self.getCaptchaAnswer(site_url, site_key, reCaptchaParams)
def solveCaptcha(self, captchaType, url, siteKey, reCaptchaParams):
return self.getCaptchaAnswer(captchaType, url, siteKey, reCaptchaParams)

View File

@@ -1,16 +1,22 @@
from __future__ import absolute_import
from ..exceptions import reCaptchaParameter
from ..exceptions import (
reCaptchaParameter,
reCaptchaTimeout,
reCaptchaAPIError
)
try:
from python_anticaptcha import (
AnticaptchaClient,
NoCaptchaTaskProxylessTask
NoCaptchaTaskProxylessTask,
HCaptchaTaskProxyless,
AnticaptchaException
)
except ImportError:
raise ImportError(
"Please install the python module 'python_anticaptcha' via pip or download it from "
"https://github.com/ad-m/python-anticaptcha"
"Please install/upgrade the python module 'python_anticaptcha' via "
"pip install python-anticaptcha or https://github.com/ad-m/python-anticaptcha/"
)
from . import reCaptcha
@@ -23,7 +29,7 @@ class captchaSolver(reCaptcha):
# ------------------------------------------------------------------------------- #
def getCaptchaAnswer(self, site_url, site_key, reCaptchaParams):
def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams):
if not reCaptchaParams.get('api_key'):
raise reCaptchaParameter("anticaptcha: Missing api_key parameter.")
@@ -32,16 +38,30 @@ class captchaSolver(reCaptcha):
if reCaptchaParams.get('proxy'):
client.session.proxies = reCaptchaParams.get('proxies')
task = NoCaptchaTaskProxylessTask(site_url, site_key)
captchaMap = {
'reCaptcha': NoCaptchaTaskProxylessTask,
'hCaptcha': HCaptchaTaskProxyless
}
task = captchaMap[captchaType](url, siteKey)
if not hasattr(client, 'createTaskSmee'):
raise NotImplementedError(
"Please upgrade 'python_anticaptcha' via pip or download it from "
"https://github.com/ad-m/python-anticaptcha"
"https://github.com/ad-m/python-anticaptcha/tree/hcaptcha"
)
job = client.createTaskSmee(task)
return job.get_solution_response()
try:
job.join(maximum_time=180)
except (AnticaptchaException) as e:
raise reCaptchaTimeout('{}'.format(getattr(e, 'message', e)))
if 'solution' in job._last_result:
return job.get_solution_response()
else:
raise reCaptchaAPIError('Job did not return `solution` key in payload.')
# ------------------------------------------------------------------------------- #

View File

@@ -12,6 +12,7 @@ except ImportError:
)
from ..exceptions import (
reCaptchaException,
reCaptchaServiceUnavailable,
reCaptchaAccountError,
reCaptchaTimeout,
@@ -154,7 +155,7 @@ class captchaSolver(reCaptcha):
# ------------------------------------------------------------------------------- #
def requestSolve(self, site_url, site_key):
def requestSolve(self, url, siteKey):
def _checkRequest(response):
if response.ok and response.json().get("is_correct") and response.json().get('captcha'):
return response
@@ -172,8 +173,8 @@ class captchaSolver(reCaptcha):
'password': self.password,
'type': '4',
'token_params': json.dumps({
'googlekey': site_key,
'pageurl': site_url
'googlekey': siteKey,
'pageurl': url
})
},
allow_redirects=False
@@ -192,7 +193,7 @@ class captchaSolver(reCaptcha):
# ------------------------------------------------------------------------------- #
def getCaptchaAnswer(self, site_url, site_key, reCaptchaParams):
def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams):
jobID = None
for param in ['username', 'password']:
@@ -202,11 +203,16 @@ class captchaSolver(reCaptcha):
)
setattr(self, param, reCaptchaParams.get(param))
if captchaType == 'hCaptcha':
raise reCaptchaException(
'Provider does not support hCaptcha.'
)
if reCaptchaParams.get('proxy'):
self.session.proxies = reCaptchaParams.get('proxies')
try:
jobID = self.requestSolve(site_url, site_key)
jobID = self.requestSolve(url, siteKey)
return self.requestJob(jobID)
except polling.TimeoutException:
try:

View File

@@ -19,8 +19,7 @@
"ECDHE-RSA-CHACHA20-POLY1305",
"AES128-GCM-SHA256",
"AES256-GCM-SHA384",
"AES128-SHA",
"AES256-SHA"
"AES128-SHA"
],
"releases": {
"Chrome/50.0.0.0": {
@@ -12825,8 +12824,7 @@
"ECDHE-ECDSA-AES128-SHA",
"DHE-RSA-AES128-SHA",
"DHE-RSA-AES256-SHA",
"AES128-SHA",
"AES256-SHA"
"AES128-SHA"
],
"releases": {
"Firefox/50.0": {