KoD 1.4
- completato il supporto al futuro Kodi 19\n- ridisegnato infoplus\n- fix vari ed eventuali\n
This commit is contained in:
@@ -46,18 +46,19 @@ from .exceptions import (
|
||||
CloudflareLoopProtection,
|
||||
CloudflareCode1020,
|
||||
CloudflareIUAMError,
|
||||
CloudflareSolveError,
|
||||
CloudflareChallengeError,
|
||||
CloudflareReCaptchaError,
|
||||
CloudflareReCaptchaProvider
|
||||
CloudflareCaptchaError,
|
||||
CloudflareCaptchaProvider
|
||||
)
|
||||
|
||||
from .interpreters import JavaScriptInterpreter
|
||||
from .reCaptcha import reCaptcha
|
||||
from .captcha import Captcha
|
||||
from .user_agent import User_Agent
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
__version__ = '1.2.40'
|
||||
__version__ = '1.2.46'
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@@ -70,12 +71,23 @@ class CipherSuiteAdapter(HTTPAdapter):
|
||||
'config',
|
||||
'_pool_connections',
|
||||
'_pool_maxsize',
|
||||
'_pool_block'
|
||||
'_pool_block',
|
||||
'source_address'
|
||||
]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.ssl_context = kwargs.pop('ssl_context', None)
|
||||
self.cipherSuite = kwargs.pop('cipherSuite', None)
|
||||
self.source_address = kwargs.pop('source_address', None)
|
||||
|
||||
if self.source_address:
|
||||
if isinstance(self.source_address, str):
|
||||
self.source_address = (self.source_address, 0)
|
||||
|
||||
if not isinstance(self.source_address, tuple):
|
||||
raise TypeError(
|
||||
"source_address must be IP address string or (ip, port) tuple"
|
||||
)
|
||||
|
||||
if not self.ssl_context:
|
||||
self.ssl_context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
|
||||
@@ -89,12 +101,14 @@ class CipherSuiteAdapter(HTTPAdapter):
|
||||
|
||||
def init_poolmanager(self, *args, **kwargs):
|
||||
kwargs['ssl_context'] = self.ssl_context
|
||||
kwargs['source_address'] = self.source_address
|
||||
return super(CipherSuiteAdapter, self).init_poolmanager(*args, **kwargs)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def proxy_manager_for(self, *args, **kwargs):
|
||||
kwargs['ssl_context'] = self.ssl_context
|
||||
kwargs['source_address'] = self.source_address
|
||||
return super(CipherSuiteAdapter, self).proxy_manager_for(*args, **kwargs)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
@@ -108,9 +122,11 @@ class CloudScraper(Session):
|
||||
self.cipherSuite = kwargs.pop('cipherSuite', None)
|
||||
self.ssl_context = kwargs.pop('ssl_context', None)
|
||||
self.interpreter = kwargs.pop('interpreter', 'native')
|
||||
self.recaptcha = kwargs.pop('recaptcha', {})
|
||||
self.captcha = kwargs.pop('captcha', {})
|
||||
self.requestPreHook = kwargs.pop('requestPreHook', None)
|
||||
self.requestPostHook = kwargs.pop('requestPostHook', None)
|
||||
self.source_address = kwargs.pop('source_address', None)
|
||||
self.doubleDown = kwargs.pop('doubleDown', True)
|
||||
|
||||
self.allow_brotli = kwargs.pop(
|
||||
'allow_brotli',
|
||||
@@ -143,7 +159,8 @@ class CloudScraper(Session):
|
||||
'https://',
|
||||
CipherSuiteAdapter(
|
||||
cipherSuite=self.cipherSuite,
|
||||
ssl_context=self.ssl_context
|
||||
ssl_context=self.ssl_context,
|
||||
source_address=self.source_address
|
||||
)
|
||||
)
|
||||
|
||||
@@ -157,6 +174,13 @@ class CloudScraper(Session):
|
||||
def __getstate__(self):
|
||||
return self.__dict__
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Allow replacing actual web request call via subclassing
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def perform_request(self, method, url, *args, **kwargs):
|
||||
return super(CloudScraper, self).request(method, url, *args, **kwargs)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Raise an Exception with no stacktrace and reset depth counter.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
@@ -236,7 +260,7 @@ class CloudScraper(Session):
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
response = self.decodeBrotli(
|
||||
super(CloudScraper, self).request(method, url, *args, **kwargs)
|
||||
self.perform_request(method, url, *args, **kwargs)
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
@@ -314,6 +338,7 @@ class CloudScraper(Session):
|
||||
resp.text,
|
||||
re.M | re.S
|
||||
)
|
||||
and re.search(r'window._cf_chl_enter\(', resp.text, re.M | re.S)
|
||||
)
|
||||
except AttributeError:
|
||||
pass
|
||||
@@ -321,17 +346,38 @@ class CloudScraper(Session):
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# check if the response contains a valid Cloudflare reCaptcha challenge
|
||||
# check if the response contains a v2 hCaptcha Cloudflare challenge
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@staticmethod
|
||||
def is_reCaptcha_Challenge(resp):
|
||||
def is_New_Captcha_Challenge(resp):
|
||||
try:
|
||||
return (
|
||||
CloudScraper.is_Captcha_Challenge(resp)
|
||||
and re.search(
|
||||
r'cpo.src\s*=\s*"/cdn-cgi/challenge-platform/orchestrate/captcha/v1"',
|
||||
resp.text,
|
||||
re.M | re.S
|
||||
)
|
||||
and re.search(r'window._cf_chl_enter\(', resp.text, re.M | re.S)
|
||||
)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# check if the response contains a Cloudflare hCaptcha challenge
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@staticmethod
|
||||
def is_Captcha_Challenge(resp):
|
||||
try:
|
||||
return (
|
||||
resp.headers.get('Server', '').startswith('cloudflare')
|
||||
and resp.status_code == 403
|
||||
and re.search(
|
||||
r'action="/.*?__cf_chl_captcha_tk__=\S+".*?data\-sitekey=.*?',
|
||||
r'action="/\S+__cf_chl_captcha_tk__=\S+',
|
||||
resp.text,
|
||||
re.M | re.DOTALL
|
||||
)
|
||||
@@ -363,7 +409,7 @@ class CloudScraper(Session):
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Wrapper for is_reCaptcha_Challenge, is_IUAM_Challenge, is_Firewall_Blocked
|
||||
# Wrapper for is_Captcha_Challenge, is_IUAM_Challenge, is_Firewall_Blocked
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def is_Challenge_Request(self, resp):
|
||||
@@ -373,15 +419,21 @@ class CloudScraper(Session):
|
||||
'Cloudflare has blocked this request (Code 1020 Detected).'
|
||||
)
|
||||
|
||||
if self.is_New_Captcha_Challenge(resp):
|
||||
self.simpleException(
|
||||
CloudflareChallengeError,
|
||||
'Detected a Cloudflare version 2 challenge, This feature is not available in the opensource (free) version.'
|
||||
)
|
||||
|
||||
if self.is_New_IUAM_Challenge(resp):
|
||||
self.simpleException(
|
||||
CloudflareChallengeError,
|
||||
'Detected the new Cloudflare challenge.'
|
||||
'Detected a Cloudflare version 2 Captcha challenge, This feature is not available in the opensource (free) version.'
|
||||
)
|
||||
|
||||
if self.is_reCaptcha_Challenge(resp) or self.is_IUAM_Challenge(resp):
|
||||
if self.is_Captcha_Challenge(resp) or self.is_IUAM_Challenge(resp):
|
||||
if self.debug:
|
||||
print('Detected Challenge.')
|
||||
print('Detected a Cloudflare version 1 challenge.')
|
||||
return True
|
||||
|
||||
return False
|
||||
@@ -442,10 +494,10 @@ class CloudScraper(Session):
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Try to solve the reCaptcha challenge via 3rd party.
|
||||
# Try to solve the Captcha challenge via 3rd party.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def reCaptcha_Challenge_Response(self, provider, provider_params, body, url):
|
||||
def captcha_Challenge_Response(self, provider, provider_params, body, url):
|
||||
try:
|
||||
formPayload = re.search(
|
||||
r'<form (?P<form>.*?="challenge-form" '
|
||||
@@ -456,8 +508,8 @@ class CloudScraper(Session):
|
||||
|
||||
if not all(key in formPayload for key in ['form', 'challengeUUID']):
|
||||
self.simpleException(
|
||||
CloudflareReCaptchaError,
|
||||
"Cloudflare reCaptcha detected, unfortunately we can't extract the parameters correctly."
|
||||
CloudflareCaptchaError,
|
||||
"Cloudflare Captcha detected, unfortunately we can't extract the parameters correctly."
|
||||
)
|
||||
|
||||
payload = OrderedDict(
|
||||
@@ -471,11 +523,28 @@ class CloudScraper(Session):
|
||||
|
||||
except (AttributeError, KeyError):
|
||||
self.simpleException(
|
||||
CloudflareReCaptchaError,
|
||||
"Cloudflare reCaptcha detected, unfortunately we can't extract the parameters correctly."
|
||||
CloudflareCaptchaError,
|
||||
"Cloudflare Captcha detected, unfortunately we can't extract the parameters correctly."
|
||||
)
|
||||
|
||||
captchaResponse = reCaptcha.dynamicImport(
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Pass proxy parameter to provider to solve captcha.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if self.proxies and self.proxies != self.captcha.get('proxy'):
|
||||
self.captcha['proxy'] = self.proxies
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Pass User-Agent if provider supports it to solve captcha.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
self.captcha['User-Agent'] = self.headers['User-Agent']
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Submit job to provider to request captcha solve.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
captchaResponse = Captcha.dynamicImport(
|
||||
provider.lower()
|
||||
).solveCaptcha(
|
||||
captchaType,
|
||||
@@ -484,6 +553,10 @@ class CloudScraper(Session):
|
||||
provider_params
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Parse and handle the response of solved captcha.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
dataPayload = OrderedDict([
|
||||
('r', payload.get('name="r" value', '')),
|
||||
('cf_captcha_kind', payload['name="cf_captcha_kind" value']),
|
||||
@@ -510,41 +583,45 @@ class CloudScraper(Session):
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def Challenge_Response(self, resp, **kwargs):
|
||||
if self.is_reCaptcha_Challenge(resp):
|
||||
if self.is_Captcha_Challenge(resp):
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# double down on the request as some websites are only checking
|
||||
# if cfuid is populated before issuing reCaptcha.
|
||||
# if cfuid is populated before issuing Captcha.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
resp = self.decodeBrotli(
|
||||
super(CloudScraper, self).request(resp.request.method, resp.url, **kwargs)
|
||||
)
|
||||
if self.doubleDown:
|
||||
resp = self.decodeBrotli(
|
||||
self.perform_request(resp.request.method, resp.url, **kwargs)
|
||||
)
|
||||
|
||||
if not self.is_reCaptcha_Challenge(resp):
|
||||
if not self.is_Captcha_Challenge(resp):
|
||||
return resp
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# if no reCaptcha provider raise a runtime error.
|
||||
# if no captcha provider raise a runtime error.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if not self.recaptcha or not isinstance(self.recaptcha, dict) or not self.recaptcha.get('provider'):
|
||||
if not self.captcha or not isinstance(self.captcha, dict) or not self.captcha.get('provider'):
|
||||
self.simpleException(
|
||||
CloudflareReCaptchaProvider,
|
||||
"Cloudflare reCaptcha detected, unfortunately you haven't loaded an anti reCaptcha provider "
|
||||
"correctly via the 'recaptcha' parameter."
|
||||
CloudflareCaptchaProvider,
|
||||
"Cloudflare Captcha detected, unfortunately you haven't loaded an anti Captcha provider "
|
||||
"correctly via the 'captcha' parameter."
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# if provider is return_response, return the response without doing anything.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if self.recaptcha.get('provider') == 'return_response':
|
||||
if self.captcha.get('provider') == 'return_response':
|
||||
return resp
|
||||
|
||||
self.recaptcha['proxies'] = self.proxies
|
||||
submit_url = self.reCaptcha_Challenge_Response(
|
||||
self.recaptcha.get('provider'),
|
||||
self.recaptcha,
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Submit request to parser wrapper to solve captcha
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
submit_url = self.captcha_Challenge_Response(
|
||||
self.captcha.get('provider'),
|
||||
self.captcha,
|
||||
resp.text,
|
||||
resp.url
|
||||
)
|
||||
@@ -618,6 +695,12 @@ class CloudScraper(Session):
|
||||
**cloudflare_kwargs
|
||||
)
|
||||
|
||||
if challengeSubmitResponse.status_code == 400:
|
||||
self.simpleException(
|
||||
CloudflareSolveError,
|
||||
'Invalid challenge answer detected, Cloudflare broken?'
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Return response if Cloudflare is doing content pass through instead of 3xx
|
||||
# else request with redirect URL also handle protocol scheme change http -> https
|
||||
@@ -686,7 +769,10 @@ class CloudScraper(Session):
|
||||
'debug',
|
||||
'delay',
|
||||
'interpreter',
|
||||
'recaptcha'
|
||||
'captcha',
|
||||
'requestPreHook',
|
||||
'requestPostHook',
|
||||
'source_address'
|
||||
] if field in kwargs
|
||||
}
|
||||
)
|
||||
@@ -738,7 +824,7 @@ if ssl.OPENSSL_VERSION_INFO < (1, 1, 1):
|
||||
print(
|
||||
"DEPRECATION: The OpenSSL being used by this python install ({}) does not meet the minimum supported "
|
||||
"version (>= OpenSSL 1.1.1) in order to support TLS 1.3 required by Cloudflare, "
|
||||
"You may encounter an unexpected reCaptcha or cloudflare 1020 blocks.".format(
|
||||
"You may encounter an unexpected Captcha or cloudflare 1020 blocks.".format(
|
||||
ssl.OPENSSL_VERSION
|
||||
)
|
||||
)
|
||||
|
||||
@@ -1,14 +1,18 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import requests
|
||||
try:
|
||||
from urlparse import urlparse
|
||||
except ImportError:
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from ..exceptions import (
|
||||
reCaptchaServiceUnavailable,
|
||||
reCaptchaAPIError,
|
||||
reCaptchaTimeout,
|
||||
reCaptchaParameter,
|
||||
reCaptchaBadJobID,
|
||||
reCaptchaReportError
|
||||
CaptchaServiceUnavailable,
|
||||
CaptchaAPIError,
|
||||
CaptchaTimeout,
|
||||
CaptchaParameter,
|
||||
CaptchaBadJobID,
|
||||
CaptchaReportError
|
||||
)
|
||||
|
||||
try:
|
||||
@@ -19,10 +23,10 @@ except ImportError:
|
||||
"https://github.com/justiniso/polling/"
|
||||
)
|
||||
|
||||
from . import reCaptcha
|
||||
from . import Captcha
|
||||
|
||||
|
||||
class captchaSolver(reCaptcha):
|
||||
class captchaSolver(Captcha):
|
||||
|
||||
def __init__(self):
|
||||
super(captchaSolver, self).__init__('2captcha')
|
||||
@@ -34,7 +38,7 @@ class captchaSolver(reCaptcha):
|
||||
@staticmethod
|
||||
def checkErrorStatus(response, request_type):
|
||||
if response.status_code in [500, 502]:
|
||||
raise reCaptchaServiceUnavailable('2Captcha: Server Side Error {}'.format(response.status_code))
|
||||
raise CaptchaServiceUnavailable('2Captcha: Server Side Error {}'.format(response.status_code))
|
||||
|
||||
errors = {
|
||||
'in.php': {
|
||||
@@ -81,7 +85,7 @@ class captchaSolver(reCaptcha):
|
||||
}
|
||||
|
||||
if response.json().get('status') == 0 and response.json().get('request') in errors.get(request_type):
|
||||
raise reCaptchaAPIError(
|
||||
raise CaptchaAPIError(
|
||||
'{} {}'.format(
|
||||
response.json().get('request'),
|
||||
errors.get(request_type).get(response.json().get('request'))
|
||||
@@ -92,8 +96,8 @@ class captchaSolver(reCaptcha):
|
||||
|
||||
def reportJob(self, jobID):
|
||||
if not jobID:
|
||||
raise reCaptchaBadJobID(
|
||||
"2Captcha: Error bad job id to request reCaptcha."
|
||||
raise CaptchaBadJobID(
|
||||
"2Captcha: Error bad job id to request Captcha."
|
||||
)
|
||||
|
||||
def _checkRequest(response):
|
||||
@@ -123,15 +127,15 @@ class captchaSolver(reCaptcha):
|
||||
if response:
|
||||
return True
|
||||
else:
|
||||
raise reCaptchaReportError(
|
||||
"2Captcha: Error - Failed to report bad reCaptcha solve."
|
||||
raise CaptchaReportError(
|
||||
"2Captcha: Error - Failed to report bad Captcha solve."
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def requestJob(self, jobID):
|
||||
if not jobID:
|
||||
raise reCaptchaBadJobID("2Captcha: Error bad job id to request reCaptcha.")
|
||||
raise CaptchaBadJobID("2Captcha: Error bad job id to request Captcha.")
|
||||
|
||||
def _checkRequest(response):
|
||||
if response.ok and response.json().get('status') == 1:
|
||||
@@ -160,8 +164,8 @@ class captchaSolver(reCaptcha):
|
||||
if response:
|
||||
return response.json().get('request')
|
||||
else:
|
||||
raise reCaptchaTimeout(
|
||||
"2Captcha: Error failed to solve reCaptcha."
|
||||
raise CaptchaTimeout(
|
||||
"2Captcha: Error failed to solve Captcha."
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
@@ -192,6 +196,14 @@ class captchaSolver(reCaptcha):
|
||||
}
|
||||
)
|
||||
|
||||
if self.proxy:
|
||||
data.update(
|
||||
{
|
||||
'proxy': self.proxy,
|
||||
'proxytype': self.proxyType
|
||||
}
|
||||
)
|
||||
|
||||
response = polling.poll(
|
||||
lambda: self.session.post(
|
||||
'{}/in.php'.format(self.host),
|
||||
@@ -207,24 +219,35 @@ class captchaSolver(reCaptcha):
|
||||
if response:
|
||||
return response.json().get('request')
|
||||
else:
|
||||
raise reCaptchaBadJobID(
|
||||
raise CaptchaBadJobID(
|
||||
'2Captcha: Error no job id was returned.'
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams):
|
||||
def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams):
|
||||
jobID = None
|
||||
|
||||
if not reCaptchaParams.get('api_key'):
|
||||
raise reCaptchaParameter(
|
||||
if not captchaParams.get('api_key'):
|
||||
raise CaptchaParameter(
|
||||
"2Captcha: Missing api_key parameter."
|
||||
)
|
||||
|
||||
self.api_key = reCaptchaParams.get('api_key')
|
||||
self.api_key = captchaParams.get('api_key')
|
||||
|
||||
if reCaptchaParams.get('proxy'):
|
||||
self.session.proxies = reCaptchaParams.get('proxies')
|
||||
if captchaParams.get('proxy') and not captchaParams.get('no_proxy'):
|
||||
hostParsed = urlparse(captchaParams.get('proxy', {}).get('https'))
|
||||
|
||||
if not hostParsed.scheme:
|
||||
raise CaptchaParameter('Cannot parse proxy correctly, bad scheme')
|
||||
|
||||
if not hostParsed.netloc:
|
||||
raise CaptchaParameter('Cannot parse proxy correctly, bad netloc')
|
||||
|
||||
self.proxyType = hostParsed.scheme
|
||||
self.proxy = hostParsed.netloc
|
||||
else:
|
||||
self.proxy = None
|
||||
|
||||
try:
|
||||
jobID = self.requestSolve(captchaType, url, siteKey)
|
||||
@@ -234,12 +257,12 @@ class captchaSolver(reCaptcha):
|
||||
if jobID:
|
||||
self.reportJob(jobID)
|
||||
except polling.TimeoutException:
|
||||
raise reCaptchaTimeout(
|
||||
"2Captcha: reCaptcha solve took to long and also failed reporting the job the job id {}.".format(jobID)
|
||||
raise CaptchaTimeout(
|
||||
"2Captcha: Captcha solve took to long and also failed reporting the job the job id {}.".format(jobID)
|
||||
)
|
||||
|
||||
raise reCaptchaTimeout(
|
||||
"2Captcha: reCaptcha solve took to long to execute job id {}, aborting.".format(jobID)
|
||||
raise CaptchaTimeout(
|
||||
"2Captcha: Captcha solve took to long to execute job id {}, aborting.".format(jobID)
|
||||
)
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ captchaSolvers = {}
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class reCaptcha(ABC):
|
||||
class Captcha(ABC):
|
||||
@abc.abstractmethod
|
||||
def __init__(self, name):
|
||||
captchaSolvers[name] = self
|
||||
@@ -26,10 +26,10 @@ class reCaptcha(ABC):
|
||||
if name not in captchaSolvers:
|
||||
try:
|
||||
__import__('{}.{}'.format(cls.__module__, name))
|
||||
if not isinstance(captchaSolvers.get(name), reCaptcha):
|
||||
raise ImportError('The anti reCaptcha provider was not initialized.')
|
||||
if not isinstance(captchaSolvers.get(name), Captcha):
|
||||
raise ImportError('The anti captcha provider was not initialized.')
|
||||
except ImportError:
|
||||
logging.error("Unable to load {} anti reCaptcha provider".format(name))
|
||||
logging.error("Unable to load {} anti captcha provider".format(name))
|
||||
raise
|
||||
|
||||
return captchaSolvers[name]
|
||||
@@ -37,10 +37,10 @@ class reCaptcha(ABC):
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@abc.abstractmethod
|
||||
def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams):
|
||||
def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams):
|
||||
pass
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def solveCaptcha(self, captchaType, url, siteKey, reCaptchaParams):
|
||||
return self.getCaptchaAnswer(captchaType, url, siteKey, reCaptchaParams)
|
||||
def solveCaptcha(self, captchaType, url, siteKey, captchaParams):
|
||||
return self.getCaptchaAnswer(captchaType, url, siteKey, captchaParams)
|
||||
109
lib/cloudscraper/captcha/anticaptcha.py
Normal file
109
lib/cloudscraper/captcha/anticaptcha.py
Normal file
@@ -0,0 +1,109 @@
|
||||
from __future__ import absolute_import
|
||||
from ..exceptions import (
|
||||
CaptchaParameter,
|
||||
CaptchaTimeout,
|
||||
CaptchaAPIError
|
||||
)
|
||||
|
||||
try:
|
||||
from urlparse import urlparse
|
||||
except ImportError:
|
||||
from urllib.parse import urlparse
|
||||
|
||||
try:
|
||||
from python_anticaptcha import (
|
||||
AnticaptchaClient,
|
||||
NoCaptchaTaskProxylessTask,
|
||||
HCaptchaTaskProxyless,
|
||||
NoCaptchaTask,
|
||||
HCaptchaTask,
|
||||
AnticaptchaException
|
||||
)
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Please install/upgrade the python module 'python_anticaptcha' via "
|
||||
"pip install python-anticaptcha or https://github.com/ad-m/python-anticaptcha/"
|
||||
)
|
||||
|
||||
import sys
|
||||
|
||||
from . import Captcha
|
||||
|
||||
|
||||
class captchaSolver(Captcha):
|
||||
|
||||
def __init__(self):
|
||||
if sys.modules['python_anticaptcha'].__version__ < '0.6':
|
||||
raise ImportError(
|
||||
"Please upgrade the python module 'python_anticaptcha' via "
|
||||
"pip install -U python-anticaptcha or https://github.com/ad-m/python-anticaptcha/"
|
||||
)
|
||||
super(captchaSolver, self).__init__('anticaptcha')
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def parseProxy(self, url, user_agent):
|
||||
parsed = urlparse(url)
|
||||
|
||||
return dict(
|
||||
proxy_type=parsed.scheme,
|
||||
proxy_address=parsed.hostname,
|
||||
proxy_port=parsed.port,
|
||||
proxy_login=parsed.username,
|
||||
proxy_password=parsed.password,
|
||||
user_agent=user_agent
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams):
|
||||
if not captchaParams.get('api_key'):
|
||||
raise CaptchaParameter("anticaptcha: Missing api_key parameter.")
|
||||
|
||||
client = AnticaptchaClient(captchaParams.get('api_key'))
|
||||
|
||||
if captchaParams.get('proxy') and not captchaParams.get('no_proxy'):
|
||||
captchaMap = {
|
||||
'reCaptcha': NoCaptchaTask,
|
||||
'hCaptcha': HCaptchaTask
|
||||
}
|
||||
|
||||
proxy = self.parseProxy(
|
||||
captchaParams.get('proxy', {}).get('https'),
|
||||
captchaParams.get('User-Agent', '')
|
||||
)
|
||||
|
||||
task = captchaMap[captchaType](
|
||||
url,
|
||||
siteKey,
|
||||
**proxy
|
||||
)
|
||||
else:
|
||||
captchaMap = {
|
||||
'reCaptcha': NoCaptchaTaskProxylessTask,
|
||||
'hCaptcha': HCaptchaTaskProxyless
|
||||
}
|
||||
task = captchaMap[captchaType](url, siteKey)
|
||||
|
||||
if not hasattr(client, 'createTaskSmee'):
|
||||
raise NotImplementedError(
|
||||
"Please upgrade 'python_anticaptcha' via pip or download it from "
|
||||
"https://github.com/ad-m/python-anticaptcha/tree/hcaptcha"
|
||||
)
|
||||
|
||||
job = client.createTaskSmee(task, timeout=180)
|
||||
|
||||
try:
|
||||
job.join(maximum_time=180)
|
||||
except (AnticaptchaException) as e:
|
||||
raise CaptchaTimeout('{}'.format(getattr(e, 'message', e)))
|
||||
|
||||
if 'solution' in job._last_result:
|
||||
return job.get_solution_response()
|
||||
else:
|
||||
raise CaptchaAPIError('Job did not return `solution` key in payload.')
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
captchaSolver()
|
||||
@@ -48,64 +48,64 @@ class CloudflareSolveError(CloudflareException):
|
||||
"""
|
||||
|
||||
|
||||
class CloudflareReCaptchaError(CloudflareException):
|
||||
class CloudflareCaptchaError(CloudflareException):
|
||||
"""
|
||||
Raise an error for problem extracting reCaptcha paramters
|
||||
Raise an error for problem extracting Captcha paramters
|
||||
from Cloudflare payload
|
||||
"""
|
||||
|
||||
|
||||
class CloudflareReCaptchaProvider(CloudflareException):
|
||||
class CloudflareCaptchaProvider(CloudflareException):
|
||||
"""
|
||||
Raise an exception for no reCaptcha provider loaded for Cloudflare.
|
||||
Raise an exception for no Captcha provider loaded for Cloudflare.
|
||||
"""
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class reCaptchaException(Exception):
|
||||
class CaptchaException(Exception):
|
||||
"""
|
||||
Base exception class for cloudscraper reCaptcha Providers
|
||||
Base exception class for cloudscraper captcha Providers
|
||||
"""
|
||||
|
||||
|
||||
class reCaptchaServiceUnavailable(reCaptchaException):
|
||||
class CaptchaServiceUnavailable(CaptchaException):
|
||||
"""
|
||||
Raise an exception for external services that cannot be reached
|
||||
"""
|
||||
|
||||
|
||||
class reCaptchaAPIError(reCaptchaException):
|
||||
class CaptchaAPIError(CaptchaException):
|
||||
"""
|
||||
Raise an error for error from API response.
|
||||
"""
|
||||
|
||||
|
||||
class reCaptchaAccountError(reCaptchaException):
|
||||
class CaptchaAccountError(CaptchaException):
|
||||
"""
|
||||
Raise an error for reCaptcha provider account problem.
|
||||
Raise an error for captcha provider account problem.
|
||||
"""
|
||||
|
||||
|
||||
class reCaptchaTimeout(reCaptchaException):
|
||||
class CaptchaTimeout(CaptchaException):
|
||||
"""
|
||||
Raise an exception for reCaptcha provider taking too long.
|
||||
Raise an exception for captcha provider taking too long.
|
||||
"""
|
||||
|
||||
|
||||
class reCaptchaParameter(reCaptchaException):
|
||||
class CaptchaParameter(CaptchaException):
|
||||
"""
|
||||
Raise an exception for bad or missing Parameter.
|
||||
"""
|
||||
|
||||
|
||||
class reCaptchaBadJobID(reCaptchaException):
|
||||
class CaptchaBadJobID(CaptchaException):
|
||||
"""
|
||||
Raise an exception for invalid job id.
|
||||
"""
|
||||
|
||||
|
||||
class reCaptchaReportError(reCaptchaException):
|
||||
class CaptchaReportError(CaptchaException):
|
||||
"""
|
||||
Raise an error for reCaptcha provider unable to report bad solve.
|
||||
Raise an error for captcha provider unable to report bad solve.
|
||||
"""
|
||||
|
||||
@@ -1,75 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
from ..exceptions import (
|
||||
reCaptchaParameter,
|
||||
reCaptchaTimeout,
|
||||
reCaptchaAPIError
|
||||
)
|
||||
|
||||
try:
|
||||
from python_anticaptcha import (
|
||||
AnticaptchaClient,
|
||||
NoCaptchaTaskProxylessTask,
|
||||
HCaptchaTaskProxyless,
|
||||
AnticaptchaException
|
||||
)
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Please install/upgrade the python module 'python_anticaptcha' via "
|
||||
"pip install python-anticaptcha or https://github.com/ad-m/python-anticaptcha/"
|
||||
)
|
||||
|
||||
import sys
|
||||
|
||||
from . import reCaptcha
|
||||
|
||||
|
||||
class captchaSolver(reCaptcha):
|
||||
|
||||
def __init__(self):
|
||||
if sys.modules['python_anticaptcha'].__version__ < '0.6':
|
||||
raise ImportError(
|
||||
"Please upgrade the python module 'python_anticaptcha' via "
|
||||
"pip install -U python-anticaptcha or https://github.com/ad-m/python-anticaptcha/"
|
||||
)
|
||||
super(captchaSolver, self).__init__('anticaptcha')
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams):
|
||||
if not reCaptchaParams.get('api_key'):
|
||||
raise reCaptchaParameter("anticaptcha: Missing api_key parameter.")
|
||||
|
||||
client = AnticaptchaClient(reCaptchaParams.get('api_key'))
|
||||
|
||||
if reCaptchaParams.get('proxy'):
|
||||
client.session.proxies = reCaptchaParams.get('proxies')
|
||||
|
||||
captchaMap = {
|
||||
'reCaptcha': NoCaptchaTaskProxylessTask,
|
||||
'hCaptcha': HCaptchaTaskProxyless
|
||||
}
|
||||
|
||||
task = captchaMap[captchaType](url, siteKey)
|
||||
|
||||
if not hasattr(client, 'createTaskSmee'):
|
||||
raise NotImplementedError(
|
||||
"Please upgrade 'python_anticaptcha' via pip or download it from "
|
||||
"https://github.com/ad-m/python-anticaptcha/tree/hcaptcha"
|
||||
)
|
||||
|
||||
job = client.createTaskSmee(task, timeout=180)
|
||||
|
||||
try:
|
||||
job.join(maximum_time=180)
|
||||
except (AnticaptchaException) as e:
|
||||
raise reCaptchaTimeout('{}'.format(getattr(e, 'message', e)))
|
||||
|
||||
if 'solution' in job._last_result:
|
||||
return job.get_solution_response()
|
||||
else:
|
||||
raise reCaptchaAPIError('Job did not return `solution` key in payload.')
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
captchaSolver()
|
||||
@@ -21,37 +21,29 @@ class User_Agent():
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def loadHeaders(self, user_agents, user_agent_version):
|
||||
if user_agents.get(self.browser).get('releases').get(user_agent_version).get('headers'):
|
||||
self.headers = user_agents.get(self.browser).get('releases').get(user_agent_version).get('headers')
|
||||
else:
|
||||
self.headers = user_agents.get(self.browser).get('default_headers')
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def filterAgents(self, releases):
|
||||
def filterAgents(self, user_agents):
|
||||
filtered = {}
|
||||
|
||||
for release in releases:
|
||||
if self.mobile and releases[release]['User-Agent']['mobile']:
|
||||
filtered[release] = filtered.get(release, []) + releases[release]['User-Agent']['mobile']
|
||||
if self.mobile:
|
||||
if self.platform in user_agents['mobile'] and user_agents['mobile'][self.platform]:
|
||||
filtered.update(user_agents['mobile'][self.platform])
|
||||
|
||||
if self.desktop and releases[release]['User-Agent']['desktop']:
|
||||
filtered[release] = filtered.get(release, []) + releases[release]['User-Agent']['desktop']
|
||||
if self.desktop:
|
||||
if self.platform in user_agents['desktop'] and user_agents['desktop'][self.platform]:
|
||||
filtered.update(user_agents['desktop'][self.platform])
|
||||
|
||||
return filtered
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def tryMatchCustom(self, user_agents):
|
||||
for browser in user_agents:
|
||||
for release in user_agents[browser]['releases']:
|
||||
for platform in ['mobile', 'desktop']:
|
||||
if re.search(re.escape(self.custom), ' '.join(user_agents[browser]['releases'][release]['User-Agent'][platform])):
|
||||
self.browser = browser
|
||||
self.loadHeaders(user_agents, release)
|
||||
for device_type in user_agents['user_agents']:
|
||||
for platform in user_agents['user_agents'][device_type]:
|
||||
for browser in user_agents['user_agents'][device_type][platform]:
|
||||
if re.search(re.escape(self.custom), ' '.join(user_agents['user_agents'][device_type][platform][browser])):
|
||||
self.headers = user_agents['headers'][browser]
|
||||
self.headers['User-Agent'] = self.custom
|
||||
self.cipherSuite = user_agents[self.browser].get('cipherSuite', [])
|
||||
self.cipherSuite = user_agents['cipherSuite'][browser]
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -60,13 +52,18 @@ class User_Agent():
|
||||
def loadUserAgent(self, *args, **kwargs):
|
||||
self.browser = kwargs.pop('browser', None)
|
||||
|
||||
self.platforms = ['linux', 'windows', 'darwin', 'android', 'ios']
|
||||
self.browsers = ['chrome', 'firefox']
|
||||
|
||||
if isinstance(self.browser, dict):
|
||||
self.custom = self.browser.get('custom', None)
|
||||
self.platform = self.browser.get('platform', None)
|
||||
self.desktop = self.browser.get('desktop', True)
|
||||
self.mobile = self.browser.get('mobile', True)
|
||||
self.browser = self.browser.get('browser', None)
|
||||
else:
|
||||
self.custom = kwargs.pop('custom', None)
|
||||
self.platform = kwargs.pop('platform', None)
|
||||
self.desktop = kwargs.pop('desktop', True)
|
||||
self.mobile = kwargs.pop('mobile', True)
|
||||
|
||||
@@ -94,22 +91,32 @@ class User_Agent():
|
||||
('Accept-Encoding', 'gzip, deflate, br')
|
||||
])
|
||||
else:
|
||||
if self.browser and not user_agents.get(self.browser):
|
||||
if self.browser and self.browser not in self.browsers:
|
||||
sys.tracebacklimit = 0
|
||||
raise RuntimeError('Sorry "{}" browser User-Agent was not found.'.format(self.browser))
|
||||
raise RuntimeError('Sorry "{}" browser is not valid, valid browsers are [{}].'.format(self.browser, ', '.join(self.browsers)))
|
||||
|
||||
if not self.platform:
|
||||
self.platform = random.SystemRandom().choice(self.platforms)
|
||||
|
||||
if self.platform not in self.platforms:
|
||||
sys.tracebacklimit = 0
|
||||
raise RuntimeError('Sorry the platform "{}" is not valid, valid platforms are [{}]'.format(self.platform, ', '.join(self.platforms)))
|
||||
|
||||
filteredAgents = self.filterAgents(user_agents['user_agents'])
|
||||
|
||||
if not self.browser:
|
||||
self.browser = random.SystemRandom().choice(list(user_agents))
|
||||
# has to be at least one in there...
|
||||
while not filteredAgents.get(self.browser):
|
||||
self.browser = random.SystemRandom().choice(list(filteredAgents.keys()))
|
||||
|
||||
self.cipherSuite = user_agents.get(self.browser).get('cipherSuite', [])
|
||||
if not filteredAgents[self.browser]:
|
||||
sys.tracebacklimit = 0
|
||||
raise RuntimeError('Sorry "{}" browser was not found with a platform of "{}".'.format(self.browser, self.platform))
|
||||
|
||||
filteredAgents = self.filterAgents(user_agents.get(self.browser).get('releases'))
|
||||
self.cipherSuite = user_agents['cipherSuite'][self.browser]
|
||||
self.headers = user_agents['headers'][self.browser]
|
||||
|
||||
user_agent_version = random.SystemRandom().choice(list(filteredAgents))
|
||||
|
||||
self.loadHeaders(user_agents, user_agent_version)
|
||||
|
||||
self.headers['User-Agent'] = random.SystemRandom().choice(filteredAgents[user_agent_version])
|
||||
self.headers['User-Agent'] = random.SystemRandom().choice(filteredAgents[self.browser])
|
||||
|
||||
if not kwargs.get('allow_brotli', False) and 'br' in self.headers['Accept-Encoding']:
|
||||
self.headers['Accept-Encoding'] = ','.join([
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user