Aggiornato cloudscraper
This commit is contained in:
@@ -1,8 +1,26 @@
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
import logging
|
||||
import re
|
||||
import requests
|
||||
import sys
|
||||
import ssl
|
||||
import requests
|
||||
|
||||
from collections import OrderedDict
|
||||
from copy import deepcopy
|
||||
|
||||
from requests.adapters import HTTPAdapter
|
||||
from requests.sessions import Session
|
||||
from requests_toolbelt.utils import dump
|
||||
|
||||
from time import sleep
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
try:
|
||||
import brotli
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import copyreg
|
||||
@@ -17,12 +35,12 @@ except ImportError:
|
||||
else:
|
||||
from html.parser import HTMLParser
|
||||
|
||||
from copy import deepcopy
|
||||
from time import sleep
|
||||
from collections import OrderedDict
|
||||
try:
|
||||
from urlparse import urlparse, urljoin
|
||||
except ImportError:
|
||||
from urllib.parse import urlparse, urljoin
|
||||
|
||||
from requests.sessions import Session
|
||||
from requests.adapters import HTTPAdapter
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
from .exceptions import (
|
||||
CloudflareLoopProtection,
|
||||
@@ -37,25 +55,9 @@ from .interpreters import JavaScriptInterpreter
|
||||
from .reCaptcha import reCaptcha
|
||||
from .user_agent import User_Agent
|
||||
|
||||
try:
|
||||
from requests_toolbelt.utils import dump
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import brotli
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
from urlparse import urlparse, urljoin
|
||||
except ImportError:
|
||||
from urllib.parse import urlparse, urljoin
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
__version__ = '1.2.36'
|
||||
__version__ = '1.2.40'
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@@ -107,6 +109,9 @@ class CloudScraper(Session):
|
||||
self.ssl_context = kwargs.pop('ssl_context', None)
|
||||
self.interpreter = kwargs.pop('interpreter', 'native')
|
||||
self.recaptcha = kwargs.pop('recaptcha', {})
|
||||
self.requestPreHook = kwargs.pop('requestPreHook', None)
|
||||
self.requestPostHook = kwargs.pop('requestPostHook', None)
|
||||
|
||||
self.allow_brotli = kwargs.pop(
|
||||
'allow_brotli',
|
||||
True if 'brotli' in sys.modules.keys() else False
|
||||
@@ -213,19 +218,46 @@ class CloudScraper(Session):
|
||||
if kwargs.get('proxies') and kwargs.get('proxies') != self.proxies:
|
||||
self.proxies = kwargs.get('proxies')
|
||||
|
||||
resp = self.decodeBrotli(
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Pre-Hook the request via user defined function.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if self.requestPreHook:
|
||||
(method, url, args, kwargs) = self.requestPreHook(
|
||||
self,
|
||||
method,
|
||||
url,
|
||||
*args,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Make the request via requests.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
response = self.decodeBrotli(
|
||||
super(CloudScraper, self).request(method, url, *args, **kwargs)
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Debug request
|
||||
# Debug the request via the Response object.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if self.debug:
|
||||
self.debugRequest(resp)
|
||||
self.debugRequest(response)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Post-Hook the request aka Post-Hook the response via user defined function.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if self.requestPostHook:
|
||||
response = self.requestPostHook(self, response)
|
||||
|
||||
if self.debug:
|
||||
self.debugRequest(response)
|
||||
|
||||
# Check if Cloudflare anti-bot is on
|
||||
if self.is_Challenge_Request(resp):
|
||||
if self.is_Challenge_Request(response):
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Try to solve the challenge and send it back
|
||||
# ------------------------------------------------------------------------------- #
|
||||
@@ -239,12 +271,12 @@ class CloudScraper(Session):
|
||||
|
||||
self._solveDepthCnt += 1
|
||||
|
||||
resp = self.Challenge_Response(resp, **kwargs)
|
||||
response = self.Challenge_Response(response, **kwargs)
|
||||
else:
|
||||
if not resp.is_redirect and resp.status_code not in [429, 503]:
|
||||
if not response.is_redirect and response.status_code not in [429, 503]:
|
||||
self._solveDepthCnt = 0
|
||||
|
||||
return resp
|
||||
return response
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# check if the response contains a valid Cloudflare challenge
|
||||
@@ -259,7 +291,7 @@ class CloudScraper(Session):
|
||||
and re.search(
|
||||
r'<form .*?="challenge-form" action="/.*?__cf_chl_jschl_tk__=\S+"',
|
||||
resp.text,
|
||||
re.M | re.DOTALL
|
||||
re.M | re.S
|
||||
)
|
||||
)
|
||||
except AttributeError:
|
||||
@@ -278,9 +310,9 @@ class CloudScraper(Session):
|
||||
resp.headers.get('Server', '').startswith('cloudflare')
|
||||
and resp.status_code in [429, 503]
|
||||
and re.search(
|
||||
r'cpo.src="/cdn-cgi/challenge-platform/orchestrate/jsch/v1"',
|
||||
r'cpo.src\s*=\s*"/cdn-cgi/challenge-platform/orchestrate/jsch/v1"',
|
||||
resp.text,
|
||||
re.M | re.DOTALL
|
||||
re.M | re.S
|
||||
)
|
||||
)
|
||||
except AttributeError:
|
||||
@@ -375,7 +407,7 @@ class CloudScraper(Session):
|
||||
)
|
||||
|
||||
payload = OrderedDict()
|
||||
for challengeParam in re.findall(r'<input\s(.*?)>', formPayload['form']):
|
||||
for challengeParam in re.findall(r'^\s*<input\s(.*?)/>', formPayload['form'], re.M | re.S):
|
||||
inputPayload = dict(re.findall(r'(\S+)="(\S+)"', challengeParam))
|
||||
if inputPayload.get('name') in ['r', 'jschl_vc', 'pass']:
|
||||
payload.update({inputPayload['name']: inputPayload['value']})
|
||||
|
||||
@@ -49,7 +49,7 @@ class JavaScriptInterpreter(ABC):
|
||||
|
||||
def solveChallenge(self, body, domain):
|
||||
try:
|
||||
return float(self.eval(body, domain))
|
||||
return '{0:.10f}'.format(float(self.eval(body, domain)))
|
||||
except Exception:
|
||||
raise CloudflareSolveError(
|
||||
'Error trying to solve Cloudflare IUAM Javascript, they may have changed their technique.'
|
||||
|
||||
@@ -9,32 +9,38 @@ def template(body, domain):
|
||||
|
||||
try:
|
||||
js = re.search(
|
||||
r'setTimeout\(function\(\){\s+(.*?a\.value = \S+)',
|
||||
r'setTimeout\(function\(\){\s+(.*?a\.value\s*=\s*\S+toFixed\(10\);)',
|
||||
body,
|
||||
re.M | re.S
|
||||
).group(1)
|
||||
except Exception:
|
||||
raise ValueError('Unable to identify Cloudflare IUAM Javascript on website. {}'.format(BUG_REPORT))
|
||||
|
||||
jsEnv = '''
|
||||
String.prototype.italics=function(str) {{return "<i>" + this + "</i>";}};
|
||||
jsEnv = '''String.prototype.italics=function(str) {{return "<i>" + this + "</i>";}};
|
||||
var subVars= {{{subVars}}};
|
||||
var document = {{
|
||||
createElement: function () {{
|
||||
return {{ firstChild: {{ href: "https://{domain}/" }} }}
|
||||
}},
|
||||
getElementById: function () {{
|
||||
return {{"innerHTML": "{innerHTML}"}};
|
||||
getElementById: function (str) {{
|
||||
return {{"innerHTML": subVars[str]}};
|
||||
}}
|
||||
}};
|
||||
'''
|
||||
|
||||
try:
|
||||
innerHTML = re.search(
|
||||
r'<div(?: [^<>]*)? id="([^<>]*?)">([^<>]*?)</div>',
|
||||
body,
|
||||
re.MULTILINE | re.DOTALL
|
||||
js = js.replace(
|
||||
r"(setInterval(function(){}, 100),t.match(/https?:\/\//)[0]);",
|
||||
r"t.match(/https?:\/\//)[0];"
|
||||
)
|
||||
innerHTML = innerHTML.group(2) if innerHTML else ''
|
||||
|
||||
k = re.search(r" k\s*=\s*'(?P<k>\S+)';", body).group('k')
|
||||
r = re.compile(r'<div id="{}(?P<id>\d+)">\s*(?P<jsfuck>[^<>]*)</div>'.format(k))
|
||||
|
||||
subVars = ''
|
||||
for m in r.finditer(body):
|
||||
subVars = '{}\n\t\t{}{}: {},\n'.format(subVars, k, m.group('id'), m.group('jsfuck'))
|
||||
subVars = subVars[:-2]
|
||||
|
||||
except: # noqa
|
||||
logging.error('Error extracting Cloudflare IUAM Javascript. {}'.format(BUG_REPORT))
|
||||
@@ -46,7 +52,7 @@ def template(body, domain):
|
||||
' ',
|
||||
jsEnv.format(
|
||||
domain=domain,
|
||||
innerHTML=innerHTML
|
||||
subVars=subVars
|
||||
),
|
||||
re.MULTILINE | re.DOTALL
|
||||
),
|
||||
|
||||
@@ -100,8 +100,8 @@ class ChallengeInterpreter(JavaScriptInterpreter):
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def flatten(l):
|
||||
return sum(map(flatten, l), []) if isinstance(l, list) else [l]
|
||||
def flatten(lists):
|
||||
return sum(map(flatten, lists), []) if isinstance(lists, list) else [lists]
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@@ -114,6 +114,7 @@ class ChallengeInterpreter(JavaScriptInterpreter):
|
||||
# Hackery Parser for Math
|
||||
stack = []
|
||||
bstack = []
|
||||
|
||||
for i in flatten(pyparsing.nestedExpr().parseString(jsFuck).asList()):
|
||||
if i == '+':
|
||||
stack.append(bstack)
|
||||
@@ -152,13 +153,35 @@ class ChallengeInterpreter(JavaScriptInterpreter):
|
||||
try:
|
||||
jsfuckChallenge = re.search(
|
||||
r"setTimeout\(function\(\){\s+var.*?f,\s*(?P<variable>\w+).*?:(?P<init>\S+)};"
|
||||
r".*?\('challenge-form'\);\s+;(?P<challenge>.*?a\.value)"
|
||||
r"(?:.*id=\"cf-dn-.*?>(?P<k>\S+)<)?",
|
||||
r".*?\('challenge-form'\);.*?;(?P<challenge>.*?a\.value)\s*=\s*\S+\.toFixed\(10\);",
|
||||
body,
|
||||
re.DOTALL | re.MULTILINE
|
||||
).groupdict()
|
||||
except AttributeError:
|
||||
raise CloudflareSolveError('There was an issue extracting the Cloudflare challenge.')
|
||||
raise CloudflareSolveError('There was an issue extracting "jsfuckChallenge" from the Cloudflare challenge.')
|
||||
|
||||
kJSFUCK = re.search(r'(;|)\s*k.=(?P<kJSFUCK>\S+);', jsfuckChallenge['challenge'], re.S | re.M)
|
||||
if kJSFUCK:
|
||||
try:
|
||||
kJSFUCK = jsfuckToNumber(kJSFUCK.group('kJSFUCK'))
|
||||
except IndexError:
|
||||
raise CloudflareSolveError('There was an issue extracting "kJSFUCK" from the Cloudflare challenge.')
|
||||
|
||||
try:
|
||||
kID = re.search(r"\s*k\s*=\s*'(?P<kID>\S+)';", body).group('kID')
|
||||
except IndexError:
|
||||
raise CloudflareSolveError('There was an issue extracting "kID" from the Cloudflare challenge.')
|
||||
|
||||
try:
|
||||
r = re.compile(r'<div id="{}(?P<id>\d+)">\s*(?P<jsfuck>[^<>]*)</div>'.format(kID))
|
||||
|
||||
kValues = {}
|
||||
for m in r.finditer(body):
|
||||
kValues[int(m.group('id'))] = m.group('jsfuck')
|
||||
|
||||
jsfuckChallenge['k'] = kValues[kJSFUCK]
|
||||
except (AttributeError, IndexError):
|
||||
raise CloudflareSolveError('There was an issue extracting "kValues" from the Cloudflare challenge.')
|
||||
|
||||
jsfuckChallenge['challenge'] = re.finditer(
|
||||
r'{}.*?([+\-*/])=(.*?);(?=a\.value|{})'.format(
|
||||
@@ -193,8 +216,8 @@ class ChallengeInterpreter(JavaScriptInterpreter):
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if not jsfuckChallenge['k'] and '+ t.length' in body:
|
||||
jschl_answer += len(domain)
|
||||
# if not jsfuckChallenge['k'] and '+ t.length' in body:
|
||||
# jschl_answer += len(domain)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
Reference in New Issue
Block a user