piccoli aggiustamenti template tests, aggiornato cloudscraper

This commit is contained in:
marco
2020-08-15 11:17:59 +02:00
parent 1a5bb28891
commit 8505158a2f
6 changed files with 8060 additions and 13855 deletions

View File

@@ -10,9 +10,6 @@ host = support.config.get_channel_url()
headers = [['Referer', host]]
@support.menu
def mainlist(item):

View File

@@ -46,18 +46,19 @@ from .exceptions import (
CloudflareLoopProtection,
CloudflareCode1020,
CloudflareIUAMError,
CloudflareSolveError,
CloudflareChallengeError,
CloudflareReCaptchaError,
CloudflareReCaptchaProvider
CloudflareCaptchaError,
CloudflareCaptchaProvider
)
from .interpreters import JavaScriptInterpreter
from .reCaptcha import reCaptcha
from .captcha import Captcha
from .user_agent import User_Agent
# ------------------------------------------------------------------------------- #
__version__ = '1.2.40'
__version__ = '1.2.46'
# ------------------------------------------------------------------------------- #
@@ -70,12 +71,23 @@ class CipherSuiteAdapter(HTTPAdapter):
'config',
'_pool_connections',
'_pool_maxsize',
'_pool_block'
'_pool_block',
'source_address'
]
def __init__(self, *args, **kwargs):
self.ssl_context = kwargs.pop('ssl_context', None)
self.cipherSuite = kwargs.pop('cipherSuite', None)
self.source_address = kwargs.pop('source_address', None)
if self.source_address:
if isinstance(self.source_address, str):
self.source_address = (self.source_address, 0)
if not isinstance(self.source_address, tuple):
raise TypeError(
"source_address must be IP address string or (ip, port) tuple"
)
if not self.ssl_context:
self.ssl_context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
@@ -89,12 +101,14 @@ class CipherSuiteAdapter(HTTPAdapter):
def init_poolmanager(self, *args, **kwargs):
kwargs['ssl_context'] = self.ssl_context
kwargs['source_address'] = self.source_address
return super(CipherSuiteAdapter, self).init_poolmanager(*args, **kwargs)
# ------------------------------------------------------------------------------- #
def proxy_manager_for(self, *args, **kwargs):
kwargs['ssl_context'] = self.ssl_context
kwargs['source_address'] = self.source_address
return super(CipherSuiteAdapter, self).proxy_manager_for(*args, **kwargs)
# ------------------------------------------------------------------------------- #
@@ -108,9 +122,11 @@ class CloudScraper(Session):
self.cipherSuite = kwargs.pop('cipherSuite', None)
self.ssl_context = kwargs.pop('ssl_context', None)
self.interpreter = kwargs.pop('interpreter', 'native')
self.recaptcha = kwargs.pop('recaptcha', {})
self.captcha = kwargs.pop('captcha', {})
self.requestPreHook = kwargs.pop('requestPreHook', None)
self.requestPostHook = kwargs.pop('requestPostHook', None)
self.source_address = kwargs.pop('source_address', None)
self.doubleDown = kwargs.pop('doubleDown', True)
self.allow_brotli = kwargs.pop(
'allow_brotli',
@@ -143,7 +159,8 @@ class CloudScraper(Session):
'https://',
CipherSuiteAdapter(
cipherSuite=self.cipherSuite,
ssl_context=self.ssl_context
ssl_context=self.ssl_context,
source_address=self.source_address
)
)
@@ -157,6 +174,13 @@ class CloudScraper(Session):
def __getstate__(self):
return self.__dict__
# ------------------------------------------------------------------------------- #
# Allow replacing actual web request call via subclassing
# ------------------------------------------------------------------------------- #
def perform_request(self, method, url, *args, **kwargs):
return super(CloudScraper, self).request(method, url, *args, **kwargs)
# ------------------------------------------------------------------------------- #
# Raise an Exception with no stacktrace and reset depth counter.
# ------------------------------------------------------------------------------- #
@@ -236,7 +260,7 @@ class CloudScraper(Session):
# ------------------------------------------------------------------------------- #
response = self.decodeBrotli(
super(CloudScraper, self).request(method, url, *args, **kwargs)
self.perform_request(method, url, *args, **kwargs)
)
# ------------------------------------------------------------------------------- #
@@ -314,6 +338,7 @@ class CloudScraper(Session):
resp.text,
re.M | re.S
)
and re.search(r'window._cf_chl_enter\(', resp.text, re.M | re.S)
)
except AttributeError:
pass
@@ -321,17 +346,38 @@ class CloudScraper(Session):
return False
# ------------------------------------------------------------------------------- #
# check if the response contains a valid Cloudflare reCaptcha challenge
# check if the response contains a v2 hCaptcha Cloudflare challenge
# ------------------------------------------------------------------------------- #
@staticmethod
def is_reCaptcha_Challenge(resp):
def is_New_Captcha_Challenge(resp):
try:
return (
CloudScraper.is_Captcha_Challenge(resp)
and re.search(
r'cpo.src\s*=\s*"/cdn-cgi/challenge-platform/orchestrate/captcha/v1"',
resp.text,
re.M | re.S
)
and re.search(r'window._cf_chl_enter\(', resp.text, re.M | re.S)
)
except AttributeError:
pass
return False
# ------------------------------------------------------------------------------- #
# check if the response contains a Cloudflare hCaptcha challenge
# ------------------------------------------------------------------------------- #
@staticmethod
def is_Captcha_Challenge(resp):
try:
return (
resp.headers.get('Server', '').startswith('cloudflare')
and resp.status_code == 403
and re.search(
r'action="/.*?__cf_chl_captcha_tk__=\S+".*?data\-sitekey=.*?',
r'action="/\S+__cf_chl_captcha_tk__=\S+',
resp.text,
re.M | re.DOTALL
)
@@ -363,7 +409,7 @@ class CloudScraper(Session):
return False
# ------------------------------------------------------------------------------- #
# Wrapper for is_reCaptcha_Challenge, is_IUAM_Challenge, is_Firewall_Blocked
# Wrapper for is_Captcha_Challenge, is_IUAM_Challenge, is_Firewall_Blocked
# ------------------------------------------------------------------------------- #
def is_Challenge_Request(self, resp):
@@ -373,15 +419,21 @@ class CloudScraper(Session):
'Cloudflare has blocked this request (Code 1020 Detected).'
)
if self.is_New_Captcha_Challenge(resp):
self.simpleException(
CloudflareChallengeError,
'Detected a Cloudflare version 2 challenge, This feature is not available in the opensource (free) version.'
)
if self.is_New_IUAM_Challenge(resp):
self.simpleException(
CloudflareChallengeError,
'Detected the new Cloudflare challenge.'
'Detected a Cloudflare version 2 Captcha challenge, This feature is not available in the opensource (free) version.'
)
if self.is_reCaptcha_Challenge(resp) or self.is_IUAM_Challenge(resp):
if self.is_Captcha_Challenge(resp) or self.is_IUAM_Challenge(resp):
if self.debug:
print('Detected Challenge.')
print('Detected a Cloudflare version 1 challenge.')
return True
return False
@@ -442,10 +494,10 @@ class CloudScraper(Session):
}
# ------------------------------------------------------------------------------- #
# Try to solve the reCaptcha challenge via 3rd party.
# Try to solve the Captcha challenge via 3rd party.
# ------------------------------------------------------------------------------- #
def reCaptcha_Challenge_Response(self, provider, provider_params, body, url):
def captcha_Challenge_Response(self, provider, provider_params, body, url):
try:
formPayload = re.search(
r'<form (?P<form>.*?="challenge-form" '
@@ -456,8 +508,8 @@ class CloudScraper(Session):
if not all(key in formPayload for key in ['form', 'challengeUUID']):
self.simpleException(
CloudflareReCaptchaError,
"Cloudflare reCaptcha detected, unfortunately we can't extract the parameters correctly."
CloudflareCaptchaError,
"Cloudflare Captcha detected, unfortunately we can't extract the parameters correctly."
)
payload = OrderedDict(
@@ -471,11 +523,28 @@ class CloudScraper(Session):
except (AttributeError, KeyError):
self.simpleException(
CloudflareReCaptchaError,
"Cloudflare reCaptcha detected, unfortunately we can't extract the parameters correctly."
CloudflareCaptchaError,
"Cloudflare Captcha detected, unfortunately we can't extract the parameters correctly."
)
captchaResponse = reCaptcha.dynamicImport(
# ------------------------------------------------------------------------------- #
# Pass proxy parameter to provider to solve captcha.
# ------------------------------------------------------------------------------- #
if self.proxies and self.proxies != self.captcha.get('proxy'):
self.captcha['proxy'] = self.proxies
# ------------------------------------------------------------------------------- #
# Pass User-Agent if provider supports it to solve captcha.
# ------------------------------------------------------------------------------- #
self.captcha['User-Agent'] = self.headers['User-Agent']
# ------------------------------------------------------------------------------- #
# Submit job to provider to request captcha solve.
# ------------------------------------------------------------------------------- #
captchaResponse = Captcha.dynamicImport(
provider.lower()
).solveCaptcha(
captchaType,
@@ -484,6 +553,10 @@ class CloudScraper(Session):
provider_params
)
# ------------------------------------------------------------------------------- #
# Parse and handle the response of solved captcha.
# ------------------------------------------------------------------------------- #
dataPayload = OrderedDict([
('r', payload.get('name="r" value', '')),
('cf_captcha_kind', payload['name="cf_captcha_kind" value']),
@@ -510,41 +583,45 @@ class CloudScraper(Session):
# ------------------------------------------------------------------------------- #
def Challenge_Response(self, resp, **kwargs):
if self.is_reCaptcha_Challenge(resp):
if self.is_Captcha_Challenge(resp):
# ------------------------------------------------------------------------------- #
# double down on the request as some websites are only checking
# if cfuid is populated before issuing reCaptcha.
# if cfuid is populated before issuing Captcha.
# ------------------------------------------------------------------------------- #
if self.doubleDown:
resp = self.decodeBrotli(
super(CloudScraper, self).request(resp.request.method, resp.url, **kwargs)
self.perform_request(resp.request.method, resp.url, **kwargs)
)
if not self.is_reCaptcha_Challenge(resp):
if not self.is_Captcha_Challenge(resp):
return resp
# ------------------------------------------------------------------------------- #
# if no reCaptcha provider raise a runtime error.
# if no captcha provider raise a runtime error.
# ------------------------------------------------------------------------------- #
if not self.recaptcha or not isinstance(self.recaptcha, dict) or not self.recaptcha.get('provider'):
if not self.captcha or not isinstance(self.captcha, dict) or not self.captcha.get('provider'):
self.simpleException(
CloudflareReCaptchaProvider,
"Cloudflare reCaptcha detected, unfortunately you haven't loaded an anti reCaptcha provider "
"correctly via the 'recaptcha' parameter."
CloudflareCaptchaProvider,
"Cloudflare Captcha detected, unfortunately you haven't loaded an anti Captcha provider "
"correctly via the 'captcha' parameter."
)
# ------------------------------------------------------------------------------- #
# if provider is return_response, return the response without doing anything.
# ------------------------------------------------------------------------------- #
if self.recaptcha.get('provider') == 'return_response':
if self.captcha.get('provider') == 'return_response':
return resp
self.recaptcha['proxies'] = self.proxies
submit_url = self.reCaptcha_Challenge_Response(
self.recaptcha.get('provider'),
self.recaptcha,
# ------------------------------------------------------------------------------- #
# Submit request to parser wrapper to solve captcha
# ------------------------------------------------------------------------------- #
submit_url = self.captcha_Challenge_Response(
self.captcha.get('provider'),
self.captcha,
resp.text,
resp.url
)
@@ -618,6 +695,12 @@ class CloudScraper(Session):
**cloudflare_kwargs
)
if challengeSubmitResponse.status_code == 400:
self.simpleException(
CloudflareSolveError,
'Invalid challenge answer detected, Cloudflare broken?'
)
# ------------------------------------------------------------------------------- #
# Return response if Cloudflare is doing content pass through instead of 3xx
# else request with redirect URL also handle protocol scheme change http -> https
@@ -686,7 +769,10 @@ class CloudScraper(Session):
'debug',
'delay',
'interpreter',
'recaptcha'
'captcha',
'requestPreHook',
'requestPostHook',
'source_address'
] if field in kwargs
}
)
@@ -738,7 +824,7 @@ if ssl.OPENSSL_VERSION_INFO < (1, 1, 1):
print(
"DEPRECATION: The OpenSSL being used by this python install ({}) does not meet the minimum supported "
"version (>= OpenSSL 1.1.1) in order to support TLS 1.3 required by Cloudflare, "
"You may encounter an unexpected reCaptcha or cloudflare 1020 blocks.".format(
"You may encounter an unexpected Captcha or cloudflare 1020 blocks.".format(
ssl.OPENSSL_VERSION
)
)

View File

@@ -48,64 +48,64 @@ class CloudflareSolveError(CloudflareException):
"""
class CloudflareReCaptchaError(CloudflareException):
class CloudflareCaptchaError(CloudflareException):
"""
Raise an error for problem extracting reCaptcha paramters
Raise an error for problem extracting Captcha paramters
from Cloudflare payload
"""
class CloudflareReCaptchaProvider(CloudflareException):
class CloudflareCaptchaProvider(CloudflareException):
"""
Raise an exception for no reCaptcha provider loaded for Cloudflare.
Raise an exception for no Captcha provider loaded for Cloudflare.
"""
# ------------------------------------------------------------------------------- #
class reCaptchaException(Exception):
class CaptchaException(Exception):
"""
Base exception class for cloudscraper reCaptcha Providers
Base exception class for cloudscraper captcha Providers
"""
class reCaptchaServiceUnavailable(reCaptchaException):
class CaptchaServiceUnavailable(CaptchaException):
"""
Raise an exception for external services that cannot be reached
"""
class reCaptchaAPIError(reCaptchaException):
class CaptchaAPIError(CaptchaException):
"""
Raise an error for error from API response.
"""
class reCaptchaAccountError(reCaptchaException):
class CaptchaAccountError(CaptchaException):
"""
Raise an error for reCaptcha provider account problem.
Raise an error for captcha provider account problem.
"""
class reCaptchaTimeout(reCaptchaException):
class CaptchaTimeout(CaptchaException):
"""
Raise an exception for reCaptcha provider taking too long.
Raise an exception for captcha provider taking too long.
"""
class reCaptchaParameter(reCaptchaException):
class CaptchaParameter(CaptchaException):
"""
Raise an exception for bad or missing Parameter.
"""
class reCaptchaBadJobID(reCaptchaException):
class CaptchaBadJobID(CaptchaException):
"""
Raise an exception for invalid job id.
"""
class reCaptchaReportError(reCaptchaException):
class CaptchaReportError(CaptchaException):
"""
Raise an error for reCaptcha provider unable to report bad solve.
Raise an error for captcha provider unable to report bad solve.
"""

View File

@@ -21,37 +21,29 @@ class User_Agent():
# ------------------------------------------------------------------------------- #
def loadHeaders(self, user_agents, user_agent_version):
if user_agents.get(self.browser).get('releases').get(user_agent_version).get('headers'):
self.headers = user_agents.get(self.browser).get('releases').get(user_agent_version).get('headers')
else:
self.headers = user_agents.get(self.browser).get('default_headers')
# ------------------------------------------------------------------------------- #
def filterAgents(self, releases):
def filterAgents(self, user_agents):
filtered = {}
for release in releases:
if self.mobile and releases[release]['User-Agent']['mobile']:
filtered[release] = filtered.get(release, []) + releases[release]['User-Agent']['mobile']
if self.mobile:
if self.platform in user_agents['mobile'] and user_agents['mobile'][self.platform]:
filtered.update(user_agents['mobile'][self.platform])
if self.desktop and releases[release]['User-Agent']['desktop']:
filtered[release] = filtered.get(release, []) + releases[release]['User-Agent']['desktop']
if self.desktop:
if self.platform in user_agents['desktop'] and user_agents['desktop'][self.platform]:
filtered.update(user_agents['desktop'][self.platform])
return filtered
# ------------------------------------------------------------------------------- #
def tryMatchCustom(self, user_agents):
for browser in user_agents:
for release in user_agents[browser]['releases']:
for platform in ['mobile', 'desktop']:
if re.search(re.escape(self.custom), ' '.join(user_agents[browser]['releases'][release]['User-Agent'][platform])):
self.browser = browser
self.loadHeaders(user_agents, release)
for device_type in user_agents['user_agents']:
for platform in user_agents['user_agents'][device_type]:
for browser in user_agents['user_agents'][device_type][platform]:
if re.search(re.escape(self.custom), ' '.join(user_agents['user_agents'][device_type][platform][browser])):
self.headers = user_agents['headers'][browser]
self.headers['User-Agent'] = self.custom
self.cipherSuite = user_agents[self.browser].get('cipherSuite', [])
self.cipherSuite = user_agents['cipherSuite'][browser]
return True
return False
@@ -60,13 +52,18 @@ class User_Agent():
def loadUserAgent(self, *args, **kwargs):
self.browser = kwargs.pop('browser', None)
self.platforms = ['linux', 'windows', 'darwin', 'android', 'ios']
self.browsers = ['chrome', 'firefox']
if isinstance(self.browser, dict):
self.custom = self.browser.get('custom', None)
self.platform = self.browser.get('platform', None)
self.desktop = self.browser.get('desktop', True)
self.mobile = self.browser.get('mobile', True)
self.browser = self.browser.get('browser', None)
else:
self.custom = kwargs.pop('custom', None)
self.platform = kwargs.pop('platform', None)
self.desktop = kwargs.pop('desktop', True)
self.mobile = kwargs.pop('mobile', True)
@@ -94,22 +91,32 @@ class User_Agent():
('Accept-Encoding', 'gzip, deflate, br')
])
else:
if self.browser and not user_agents.get(self.browser):
if self.browser and self.browser not in self.browsers:
sys.tracebacklimit = 0
raise RuntimeError('Sorry "{}" browser User-Agent was not found.'.format(self.browser))
raise RuntimeError('Sorry "{}" browser is not valid, valid browsers are [{}].'.format(self.browser, ', '.join(self.browsers)))
if not self.platform:
self.platform = random.SystemRandom().choice(self.platforms)
if self.platform not in self.platforms:
sys.tracebacklimit = 0
raise RuntimeError('Sorry the platform "{}" is not valid, valid platforms are [{}]'.format(self.platform, ', '.join(self.platforms)))
filteredAgents = self.filterAgents(user_agents['user_agents'])
if not self.browser:
self.browser = random.SystemRandom().choice(list(user_agents))
# has to be at least one in there...
while not filteredAgents.get(self.browser):
self.browser = random.SystemRandom().choice(list(filteredAgents.keys()))
self.cipherSuite = user_agents.get(self.browser).get('cipherSuite', [])
if not filteredAgents[self.browser]:
sys.tracebacklimit = 0
raise RuntimeError('Sorry "{}" browser was not found with a platform of "{}".'.format(self.browser, self.platform))
filteredAgents = self.filterAgents(user_agents.get(self.browser).get('releases'))
self.cipherSuite = user_agents['cipherSuite'][self.browser]
self.headers = user_agents['headers'][self.browser]
user_agent_version = random.SystemRandom().choice(list(filteredAgents))
self.loadHeaders(user_agents, user_agent_version)
self.headers['User-Agent'] = random.SystemRandom().choice(filteredAgents[user_agent_version])
self.headers['User-Agent'] = random.SystemRandom().choice(filteredAgents[self.browser])
if not kwargs.get('allow_brotli', False) and 'br' in self.headers['Accept-Encoding']:
self.headers['Accept-Encoding'] = ','.join([

File diff suppressed because it is too large Load Diff

View File

@@ -55,7 +55,7 @@
{%- if (test_case.stdout or test_case.err or test_case.err) and test_case.outcome != test_case.SKIP %}
<tr style="display:none;">
<td class="col-xs-9" colspan="3">
{%- if test_case.stdout %}<p>{{ test_case.stdout|e }}</p>{% endif %}
{%- if test_case.stdout %}<p style="white-space: pre-line;">{{ test_case.stdout|e }}</p>{% endif %}
{%- if test_case.err %}<p style="color:maroon;">{{ test_case.err[0].__name__ }}: {{ test_case.err[1] }}</p>{% endif %}
{%- if test_case.err %}<p style="color:maroon;">{{ test_case.test_exception_info }}</p>{% endif %}
</td>
@@ -64,7 +64,7 @@
{%- if (test_case.stdout or test_case.err or test_case.err) and test_case.outcome == test_case.SKIP %}
<tr style="display:none;">
<td class="col-xs-9" colspan="3">
{%- if test_case.stdout %}<p>{{ test_case.stdout|e }}</p>{% endif %}
{%- if test_case.stdout %}<p style="white-space: pre-line;">{{ test_case.stdout|e }}</p>{% endif %}
{%- if test_case.err %}<p style="color:maroon;">{{ test_case.err }}</p>{% endif %}
</td>
</tr>
@@ -164,6 +164,10 @@
} else {
$(this).show()
}
$(this).find(".btn").each(function(){
$(this).text("View")
});
});
}
$('#showPassed').on('click', function(){