Aggiornato cloudscraper
This commit is contained in:
@@ -1,8 +1,26 @@
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
import logging
|
||||
import re
|
||||
import requests
|
||||
import sys
|
||||
import ssl
|
||||
import requests
|
||||
|
||||
from collections import OrderedDict
|
||||
from copy import deepcopy
|
||||
|
||||
from requests.adapters import HTTPAdapter
|
||||
from requests.sessions import Session
|
||||
from requests_toolbelt.utils import dump
|
||||
|
||||
from time import sleep
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
try:
|
||||
import brotli
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import copyreg
|
||||
@@ -17,12 +35,12 @@ except ImportError:
|
||||
else:
|
||||
from html.parser import HTMLParser
|
||||
|
||||
from copy import deepcopy
|
||||
from time import sleep
|
||||
from collections import OrderedDict
|
||||
try:
|
||||
from urlparse import urlparse, urljoin
|
||||
except ImportError:
|
||||
from urllib.parse import urlparse, urljoin
|
||||
|
||||
from requests.sessions import Session
|
||||
from requests.adapters import HTTPAdapter
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
from .exceptions import (
|
||||
CloudflareLoopProtection,
|
||||
@@ -37,25 +55,9 @@ from .interpreters import JavaScriptInterpreter
|
||||
from .reCaptcha import reCaptcha
|
||||
from .user_agent import User_Agent
|
||||
|
||||
try:
|
||||
from requests_toolbelt.utils import dump
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import brotli
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
from urlparse import urlparse, urljoin
|
||||
except ImportError:
|
||||
from urllib.parse import urlparse, urljoin
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
__version__ = '1.2.36'
|
||||
__version__ = '1.2.40'
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@@ -107,6 +109,9 @@ class CloudScraper(Session):
|
||||
self.ssl_context = kwargs.pop('ssl_context', None)
|
||||
self.interpreter = kwargs.pop('interpreter', 'native')
|
||||
self.recaptcha = kwargs.pop('recaptcha', {})
|
||||
self.requestPreHook = kwargs.pop('requestPreHook', None)
|
||||
self.requestPostHook = kwargs.pop('requestPostHook', None)
|
||||
|
||||
self.allow_brotli = kwargs.pop(
|
||||
'allow_brotli',
|
||||
True if 'brotli' in sys.modules.keys() else False
|
||||
@@ -213,19 +218,46 @@ class CloudScraper(Session):
|
||||
if kwargs.get('proxies') and kwargs.get('proxies') != self.proxies:
|
||||
self.proxies = kwargs.get('proxies')
|
||||
|
||||
resp = self.decodeBrotli(
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Pre-Hook the request via user defined function.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if self.requestPreHook:
|
||||
(method, url, args, kwargs) = self.requestPreHook(
|
||||
self,
|
||||
method,
|
||||
url,
|
||||
*args,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Make the request via requests.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
response = self.decodeBrotli(
|
||||
super(CloudScraper, self).request(method, url, *args, **kwargs)
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Debug request
|
||||
# Debug the request via the Response object.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if self.debug:
|
||||
self.debugRequest(resp)
|
||||
self.debugRequest(response)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Post-Hook the request aka Post-Hook the response via user defined function.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if self.requestPostHook:
|
||||
response = self.requestPostHook(self, response)
|
||||
|
||||
if self.debug:
|
||||
self.debugRequest(response)
|
||||
|
||||
# Check if Cloudflare anti-bot is on
|
||||
if self.is_Challenge_Request(resp):
|
||||
if self.is_Challenge_Request(response):
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Try to solve the challenge and send it back
|
||||
# ------------------------------------------------------------------------------- #
|
||||
@@ -239,12 +271,12 @@ class CloudScraper(Session):
|
||||
|
||||
self._solveDepthCnt += 1
|
||||
|
||||
resp = self.Challenge_Response(resp, **kwargs)
|
||||
response = self.Challenge_Response(response, **kwargs)
|
||||
else:
|
||||
if not resp.is_redirect and resp.status_code not in [429, 503]:
|
||||
if not response.is_redirect and response.status_code not in [429, 503]:
|
||||
self._solveDepthCnt = 0
|
||||
|
||||
return resp
|
||||
return response
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# check if the response contains a valid Cloudflare challenge
|
||||
@@ -259,7 +291,7 @@ class CloudScraper(Session):
|
||||
and re.search(
|
||||
r'<form .*?="challenge-form" action="/.*?__cf_chl_jschl_tk__=\S+"',
|
||||
resp.text,
|
||||
re.M | re.DOTALL
|
||||
re.M | re.S
|
||||
)
|
||||
)
|
||||
except AttributeError:
|
||||
@@ -278,9 +310,9 @@ class CloudScraper(Session):
|
||||
resp.headers.get('Server', '').startswith('cloudflare')
|
||||
and resp.status_code in [429, 503]
|
||||
and re.search(
|
||||
r'cpo.src="/cdn-cgi/challenge-platform/orchestrate/jsch/v1"',
|
||||
r'cpo.src\s*=\s*"/cdn-cgi/challenge-platform/orchestrate/jsch/v1"',
|
||||
resp.text,
|
||||
re.M | re.DOTALL
|
||||
re.M | re.S
|
||||
)
|
||||
)
|
||||
except AttributeError:
|
||||
@@ -375,7 +407,7 @@ class CloudScraper(Session):
|
||||
)
|
||||
|
||||
payload = OrderedDict()
|
||||
for challengeParam in re.findall(r'<input\s(.*?)>', formPayload['form']):
|
||||
for challengeParam in re.findall(r'^\s*<input\s(.*?)/>', formPayload['form'], re.M | re.S):
|
||||
inputPayload = dict(re.findall(r'(\S+)="(\S+)"', challengeParam))
|
||||
if inputPayload.get('name') in ['r', 'jschl_vc', 'pass']:
|
||||
payload.update({inputPayload['name']: inputPayload['value']})
|
||||
|
||||
Reference in New Issue
Block a user