Bypass CloudFlare

This commit is contained in:
Alhaziel
2019-05-30 16:31:45 +02:00
parent a634ad5965
commit e7a750564f
13 changed files with 1419 additions and 18 deletions
+11 -1
View File
@@ -98,7 +98,7 @@ load_cookies()
def downloadpage(url, post=None, headers=None, timeout=None, follow_redirects=True, cookies=True, replace_headers=False,
add_referer=False, only_headers=False, bypass_cloudflare=True, count_retries=0, count_retries_tot=5, random_headers=False, ignore_response_code=False, alfa_s=False, proxy=True, proxy_web=False, proxy_addr_forced=None,forced_proxy=None, proxy_retries=1):
add_referer=False, only_headers=False, bypass_cloudflare=True, count_retries=0, count_retries_tot=1, random_headers=False, ignore_response_code=False, alfa_s=False, proxy=True, proxy_web=False, proxy_addr_forced=None,forced_proxy=None, proxy_retries=1):
"""
Abre una url y retorna los datos obtenidos
@@ -367,8 +367,10 @@ def downloadpage(url, post=None, headers=None, timeout=None, follow_redirects=Tr
if not alfa_s:
logger.info("Could not decompress with zlib")
response["data"] = data_alt
# Anti Cloudflare
if bypass_cloudflare and count_retries < count_retries_tot:
from core.cloudflare import Cloudflare
cf = Cloudflare(response)
@@ -398,6 +400,14 @@ def downloadpage(url, post=None, headers=None, timeout=None, follow_redirects=Tr
else:
if not alfa_s:
logger.info("Unable to authorize")
logger.info("try to use CloudScrape")
try:
from lib import cloudscraper
scraper = cloudscraper.CloudScraper()
data = scraper.get(url).content
response["data"] = data
except:
logger.info("Unable to Scrape")
# Si hay errores usando un Proxy, se refrescan el Proxy y se reintenta el número de veces indicado en proxy_retries
try:
+3 -3
View File
@@ -121,7 +121,7 @@ def scrape(item, patron = '', listGroups = [], headers="", blacklist="", data=""
itemlist = []
if not data:
data = httptools.downloadpage(item.url, headers=headers).data.replace("'", '"')
data = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True).data.replace("'", '"')
data = re.sub('\n|\t', ' ', data)
# replace all ' with " and eliminate newline, so we don't need to worry about
log('DATA =', data)
@@ -451,7 +451,7 @@ def typo(string, typography=''):
def match(item, patron='', patron_block='', headers='', url=''):
matches = []
url = url if url else item.url
data = httptools.downloadpage(url, headers=headers).data.replace("'", '"')
data = httptools.downloadpage(url, headers=headers, ignore_response_code=True).data.replace("'", '"')
data = re.sub(r'\n|\t|\s\s', '', data)
log('DATA= ', data)
@@ -537,7 +537,7 @@ def pagination(itemlist, item, page, perpage, function_level=1):
def server(item, data='', itemlist='', headers='', AutoPlay=True, CheckLinks=True):
if not data:
data = httptools.downloadpage(item.url, headers=headers).data
data = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True).data
if not itemlist:
itemlist = servertools.find_video_items(data=str(data))