diff --git a/lib/cloudscraper/reCaptcha/2captcha.py b/lib/cloudscraper/captcha/2captcha.py similarity index 80% rename from lib/cloudscraper/reCaptcha/2captcha.py rename to lib/cloudscraper/captcha/2captcha.py index e4789237..0c300edb 100644 --- a/lib/cloudscraper/reCaptcha/2captcha.py +++ b/lib/cloudscraper/captcha/2captcha.py @@ -1,14 +1,18 @@ from __future__ import absolute_import import requests +try: + from urlparse import urlparse +except ImportError: + from urllib.parse import urlparse from ..exceptions import ( - reCaptchaServiceUnavailable, - reCaptchaAPIError, - reCaptchaTimeout, - reCaptchaParameter, - reCaptchaBadJobID, - reCaptchaReportError + CaptchaServiceUnavailable, + CaptchaAPIError, + CaptchaTimeout, + CaptchaParameter, + CaptchaBadJobID, + CaptchaReportError ) try: @@ -19,10 +23,10 @@ except ImportError: "https://github.com/justiniso/polling/" ) -from . import reCaptcha +from . import Captcha -class captchaSolver(reCaptcha): +class captchaSolver(Captcha): def __init__(self): super(captchaSolver, self).__init__('2captcha') @@ -34,7 +38,7 @@ class captchaSolver(reCaptcha): @staticmethod def checkErrorStatus(response, request_type): if response.status_code in [500, 502]: - raise reCaptchaServiceUnavailable('2Captcha: Server Side Error {}'.format(response.status_code)) + raise CaptchaServiceUnavailable('2Captcha: Server Side Error {}'.format(response.status_code)) errors = { 'in.php': { @@ -81,7 +85,7 @@ class captchaSolver(reCaptcha): } if response.json().get('status') == 0 and response.json().get('request') in errors.get(request_type): - raise reCaptchaAPIError( + raise CaptchaAPIError( '{} {}'.format( response.json().get('request'), errors.get(request_type).get(response.json().get('request')) @@ -92,8 +96,8 @@ class captchaSolver(reCaptcha): def reportJob(self, jobID): if not jobID: - raise reCaptchaBadJobID( - "2Captcha: Error bad job id to request reCaptcha." + raise CaptchaBadJobID( + "2Captcha: Error bad job id to request Captcha." ) def _checkRequest(response): @@ -123,15 +127,15 @@ class captchaSolver(reCaptcha): if response: return True else: - raise reCaptchaReportError( - "2Captcha: Error - Failed to report bad reCaptcha solve." + raise CaptchaReportError( + "2Captcha: Error - Failed to report bad Captcha solve." ) # ------------------------------------------------------------------------------- # def requestJob(self, jobID): if not jobID: - raise reCaptchaBadJobID("2Captcha: Error bad job id to request reCaptcha.") + raise CaptchaBadJobID("2Captcha: Error bad job id to request Captcha.") def _checkRequest(response): if response.ok and response.json().get('status') == 1: @@ -160,8 +164,8 @@ class captchaSolver(reCaptcha): if response: return response.json().get('request') else: - raise reCaptchaTimeout( - "2Captcha: Error failed to solve reCaptcha." + raise CaptchaTimeout( + "2Captcha: Error failed to solve Captcha." ) # ------------------------------------------------------------------------------- # @@ -192,6 +196,14 @@ class captchaSolver(reCaptcha): } ) + if self.proxy: + data.update( + { + 'proxy': self.proxy, + 'proxytype': self.proxyType + } + ) + response = polling.poll( lambda: self.session.post( '{}/in.php'.format(self.host), @@ -207,24 +219,35 @@ class captchaSolver(reCaptcha): if response: return response.json().get('request') else: - raise reCaptchaBadJobID( + raise CaptchaBadJobID( '2Captcha: Error no job id was returned.' ) # ------------------------------------------------------------------------------- # - def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams): + def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams): jobID = None - if not reCaptchaParams.get('api_key'): - raise reCaptchaParameter( + if not captchaParams.get('api_key'): + raise CaptchaParameter( "2Captcha: Missing api_key parameter." ) - self.api_key = reCaptchaParams.get('api_key') + self.api_key = captchaParams.get('api_key') - if reCaptchaParams.get('proxy'): - self.session.proxies = reCaptchaParams.get('proxies') + if captchaParams.get('proxy') and not captchaParams.get('no_proxy'): + hostParsed = urlparse(captchaParams.get('proxy', {}).get('https')) + + if not hostParsed.scheme: + raise CaptchaParameter('Cannot parse proxy correctly, bad scheme') + + if not hostParsed.netloc: + raise CaptchaParameter('Cannot parse proxy correctly, bad netloc') + + self.proxyType = hostParsed.scheme + self.proxy = hostParsed.netloc + else: + self.proxy = None try: jobID = self.requestSolve(captchaType, url, siteKey) @@ -234,12 +257,12 @@ class captchaSolver(reCaptcha): if jobID: self.reportJob(jobID) except polling.TimeoutException: - raise reCaptchaTimeout( - "2Captcha: reCaptcha solve took to long and also failed reporting the job the job id {}.".format(jobID) + raise CaptchaTimeout( + "2Captcha: Captcha solve took to long and also failed reporting the job the job id {}.".format(jobID) ) - raise reCaptchaTimeout( - "2Captcha: reCaptcha solve took to long to execute job id {}, aborting.".format(jobID) + raise CaptchaTimeout( + "2Captcha: Captcha solve took to long to execute job id {}, aborting.".format(jobID) ) diff --git a/lib/cloudscraper/reCaptcha/9kw.py b/lib/cloudscraper/captcha/9kw.py similarity index 100% rename from lib/cloudscraper/reCaptcha/9kw.py rename to lib/cloudscraper/captcha/9kw.py diff --git a/lib/cloudscraper/reCaptcha/__init__.py b/lib/cloudscraper/captcha/__init__.py similarity index 70% rename from lib/cloudscraper/reCaptcha/__init__.py rename to lib/cloudscraper/captcha/__init__.py index f23d2601..0e4f8f00 100644 --- a/lib/cloudscraper/reCaptcha/__init__.py +++ b/lib/cloudscraper/captcha/__init__.py @@ -14,7 +14,7 @@ captchaSolvers = {} # ------------------------------------------------------------------------------- # -class reCaptcha(ABC): +class Captcha(ABC): @abc.abstractmethod def __init__(self, name): captchaSolvers[name] = self @@ -26,10 +26,10 @@ class reCaptcha(ABC): if name not in captchaSolvers: try: __import__('{}.{}'.format(cls.__module__, name)) - if not isinstance(captchaSolvers.get(name), reCaptcha): - raise ImportError('The anti reCaptcha provider was not initialized.') + if not isinstance(captchaSolvers.get(name), Captcha): + raise ImportError('The anti captcha provider was not initialized.') except ImportError: - logging.error("Unable to load {} anti reCaptcha provider".format(name)) + logging.error("Unable to load {} anti captcha provider".format(name)) raise return captchaSolvers[name] @@ -37,10 +37,10 @@ class reCaptcha(ABC): # ------------------------------------------------------------------------------- # @abc.abstractmethod - def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams): + def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams): pass # ------------------------------------------------------------------------------- # - def solveCaptcha(self, captchaType, url, siteKey, reCaptchaParams): - return self.getCaptchaAnswer(captchaType, url, siteKey, reCaptchaParams) + def solveCaptcha(self, captchaType, url, siteKey, captchaParams): + return self.getCaptchaAnswer(captchaType, url, siteKey, captchaParams) diff --git a/lib/cloudscraper/captcha/anticaptcha.py b/lib/cloudscraper/captcha/anticaptcha.py new file mode 100644 index 00000000..ec453b9c --- /dev/null +++ b/lib/cloudscraper/captcha/anticaptcha.py @@ -0,0 +1,109 @@ +from __future__ import absolute_import +from ..exceptions import ( + CaptchaParameter, + CaptchaTimeout, + CaptchaAPIError +) + +try: + from urlparse import urlparse +except ImportError: + from urllib.parse import urlparse + +try: + from python_anticaptcha import ( + AnticaptchaClient, + NoCaptchaTaskProxylessTask, + HCaptchaTaskProxyless, + NoCaptchaTask, + HCaptchaTask, + AnticaptchaException + ) +except ImportError: + raise ImportError( + "Please install/upgrade the python module 'python_anticaptcha' via " + "pip install python-anticaptcha or https://github.com/ad-m/python-anticaptcha/" + ) + +import sys + +from . import Captcha + + +class captchaSolver(Captcha): + + def __init__(self): + if sys.modules['python_anticaptcha'].__version__ < '0.6': + raise ImportError( + "Please upgrade the python module 'python_anticaptcha' via " + "pip install -U python-anticaptcha or https://github.com/ad-m/python-anticaptcha/" + ) + super(captchaSolver, self).__init__('anticaptcha') + + # ------------------------------------------------------------------------------- # + + def parseProxy(self, url, user_agent): + parsed = urlparse(url) + + return dict( + proxy_type=parsed.scheme, + proxy_address=parsed.hostname, + proxy_port=parsed.port, + proxy_login=parsed.username, + proxy_password=parsed.password, + user_agent=user_agent + ) + + # ------------------------------------------------------------------------------- # + + def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams): + if not captchaParams.get('api_key'): + raise CaptchaParameter("anticaptcha: Missing api_key parameter.") + + client = AnticaptchaClient(captchaParams.get('api_key')) + + if captchaParams.get('proxy') and not captchaParams.get('no_proxy'): + captchaMap = { + 'reCaptcha': NoCaptchaTask, + 'hCaptcha': HCaptchaTask + } + + proxy = self.parseProxy( + captchaParams.get('proxy', {}).get('https'), + captchaParams.get('User-Agent', '') + ) + + task = captchaMap[captchaType]( + url, + siteKey, + **proxy + ) + else: + captchaMap = { + 'reCaptcha': NoCaptchaTaskProxylessTask, + 'hCaptcha': HCaptchaTaskProxyless + } + task = captchaMap[captchaType](url, siteKey) + + if not hasattr(client, 'createTaskSmee'): + raise NotImplementedError( + "Please upgrade 'python_anticaptcha' via pip or download it from " + "https://github.com/ad-m/python-anticaptcha/tree/hcaptcha" + ) + + job = client.createTaskSmee(task, timeout=180) + + try: + job.join(maximum_time=180) + except (AnticaptchaException) as e: + raise CaptchaTimeout('{}'.format(getattr(e, 'message', e))) + + if 'solution' in job._last_result: + return job.get_solution_response() + else: + raise CaptchaAPIError('Job did not return `solution` key in payload.') + + +# ------------------------------------------------------------------------------- # + +captchaSolver() diff --git a/lib/cloudscraper/reCaptcha/deathbycaptcha.py b/lib/cloudscraper/captcha/deathbycaptcha.py similarity index 100% rename from lib/cloudscraper/reCaptcha/deathbycaptcha.py rename to lib/cloudscraper/captcha/deathbycaptcha.py diff --git a/lib/cloudscraper/reCaptcha/anticaptcha.py b/lib/cloudscraper/reCaptcha/anticaptcha.py deleted file mode 100644 index b25e4e14..00000000 --- a/lib/cloudscraper/reCaptcha/anticaptcha.py +++ /dev/null @@ -1,75 +0,0 @@ -from __future__ import absolute_import -from ..exceptions import ( - reCaptchaParameter, - reCaptchaTimeout, - reCaptchaAPIError -) - -try: - from python_anticaptcha import ( - AnticaptchaClient, - NoCaptchaTaskProxylessTask, - HCaptchaTaskProxyless, - AnticaptchaException - ) -except ImportError: - raise ImportError( - "Please install/upgrade the python module 'python_anticaptcha' via " - "pip install python-anticaptcha or https://github.com/ad-m/python-anticaptcha/" - ) - -import sys - -from . import reCaptcha - - -class captchaSolver(reCaptcha): - - def __init__(self): - if sys.modules['python_anticaptcha'].__version__ < '0.6': - raise ImportError( - "Please upgrade the python module 'python_anticaptcha' via " - "pip install -U python-anticaptcha or https://github.com/ad-m/python-anticaptcha/" - ) - super(captchaSolver, self).__init__('anticaptcha') - - # ------------------------------------------------------------------------------- # - - def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams): - if not reCaptchaParams.get('api_key'): - raise reCaptchaParameter("anticaptcha: Missing api_key parameter.") - - client = AnticaptchaClient(reCaptchaParams.get('api_key')) - - if reCaptchaParams.get('proxy'): - client.session.proxies = reCaptchaParams.get('proxies') - - captchaMap = { - 'reCaptcha': NoCaptchaTaskProxylessTask, - 'hCaptcha': HCaptchaTaskProxyless - } - - task = captchaMap[captchaType](url, siteKey) - - if not hasattr(client, 'createTaskSmee'): - raise NotImplementedError( - "Please upgrade 'python_anticaptcha' via pip or download it from " - "https://github.com/ad-m/python-anticaptcha/tree/hcaptcha" - ) - - job = client.createTaskSmee(task, timeout=180) - - try: - job.join(maximum_time=180) - except (AnticaptchaException) as e: - raise reCaptchaTimeout('{}'.format(getattr(e, 'message', e))) - - if 'solution' in job._last_result: - return job.get_solution_response() - else: - raise reCaptchaAPIError('Job did not return `solution` key in payload.') - - -# ------------------------------------------------------------------------------- # - -captchaSolver()