Nuovo metoto anti CF -> fix ilgenio
This commit is contained in:
+1
-1
@@ -28,7 +28,7 @@
|
|||||||
"guardaserieclick": "https://www.guardaserie.date",
|
"guardaserieclick": "https://www.guardaserie.date",
|
||||||
"guardaserieicu": "https://guardaserie.rocks",
|
"guardaserieicu": "https://guardaserie.rocks",
|
||||||
"hd4me": "https://hd4me.net",
|
"hd4me": "https://hd4me.net",
|
||||||
"ilgeniodellostreaming": "https://ilgeniodellostreaming.pet",
|
"ilgeniodellostreaming": "https://ilgeniodellostreaming.cat",
|
||||||
"ilgeniodellostreaming_cam": "https://ilgeniodellostreaming.gold",
|
"ilgeniodellostreaming_cam": "https://ilgeniodellostreaming.gold",
|
||||||
"italiaserie": "https://italiaserie.best",
|
"italiaserie": "https://italiaserie.best",
|
||||||
"mondoserietv": "https://mondoserietv.fun",
|
"mondoserietv": "https://mondoserietv.fun",
|
||||||
|
|||||||
@@ -47,7 +47,7 @@ def mainlist(item):
|
|||||||
@support.scrape
|
@support.scrape
|
||||||
def peliculas(item):
|
def peliculas(item):
|
||||||
info()
|
info()
|
||||||
# debug = True
|
# debugBlock = True
|
||||||
|
|
||||||
if item.args == 'search':
|
if item.args == 'search':
|
||||||
patronBlock = r'<div class="search-page">(?P<block>.*?)<footer class="main">'
|
patronBlock = r'<div class="search-page">(?P<block>.*?)<footer class="main">'
|
||||||
@@ -104,7 +104,7 @@ def episodios(item):
|
|||||||
patron = r'<a href="(?P<url>[^"]+)"><img src="(?P<thumb>[^"]+)">.*?'\
|
patron = r'<a href="(?P<url>[^"]+)"><img src="(?P<thumb>[^"]+)">.*?'\
|
||||||
'<div class="numerando">(?P<episode>[^<]+).*?<div class="episodiotitle">'\
|
'<div class="numerando">(?P<episode>[^<]+).*?<div class="episodiotitle">'\
|
||||||
'[^>]+>(?P<title>[^<]+)<\/a>'
|
'[^>]+>(?P<title>[^<]+)<\/a>'
|
||||||
# debug = True
|
# debug = True
|
||||||
return locals()
|
return locals()
|
||||||
|
|
||||||
@support.scrape
|
@support.scrape
|
||||||
@@ -128,25 +128,16 @@ def search(item, text):
|
|||||||
info(text)
|
info(text)
|
||||||
itemlist = []
|
itemlist = []
|
||||||
text = text.replace(' ', '+')
|
text = text.replace(' ', '+')
|
||||||
item.url = host + "/wp-json/wp/v2/search?per_page=100&search=" + text
|
item.url = host + "?s=" + text
|
||||||
results = support.httptools.downloadpage(item.url).json
|
# try:
|
||||||
for r in results:
|
item.args = 'search'
|
||||||
title = r['title']
|
return peliculas(item)
|
||||||
longtitle = support.typo(title, 'bold')
|
# except:
|
||||||
if '[sub-ita]' in title.lower():
|
# import sys
|
||||||
longtitle += support.typo('Sub-ITA', '_ [] color kod')
|
# for line in sys.exc_info():
|
||||||
title = title.split('[')[0]
|
# info("%s" % line)
|
||||||
itemlist.append(item.clone(action='findvideos' if r['subtype'] == 'movies' else 'episodios',
|
|
||||||
title=longtitle,
|
return []
|
||||||
fulltitle=title,
|
|
||||||
show=title,
|
|
||||||
contentTitle=title,
|
|
||||||
contentSerieName=title,
|
|
||||||
contentType='movie' if r['subtype'] == 'movies' else 'tvshow',
|
|
||||||
url=r['url']))
|
|
||||||
# support.dbg()
|
|
||||||
support.tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
|
|
||||||
return itemlist
|
|
||||||
|
|
||||||
def newest(categoria):
|
def newest(categoria):
|
||||||
info(categoria)
|
info(categoria)
|
||||||
|
|||||||
+20
-42
@@ -58,7 +58,6 @@ HTTPTOOLS_DEFAULT_RANDOM_HEADERS = False
|
|||||||
# with open(CF_LIST_PATH, "rb") as CF_File:
|
# with open(CF_LIST_PATH, "rb") as CF_File:
|
||||||
# CF_LIST = CF_File.read().splitlines()
|
# CF_LIST = CF_File.read().splitlines()
|
||||||
|
|
||||||
FORCE_CLOUDSCRAPER_LIST = ['akvideo.stream']
|
|
||||||
|
|
||||||
def get_user_agent():
|
def get_user_agent():
|
||||||
# Returns the global user agent to be used when necessary for the url.
|
# Returns the global user agent to be used when necessary for the url.
|
||||||
@@ -269,25 +268,12 @@ def downloadpage(url, **opt):
|
|||||||
"""
|
"""
|
||||||
url = scrapertools.unescape(url)
|
url = scrapertools.unescape(url)
|
||||||
domain = urlparse.urlparse(url).netloc
|
domain = urlparse.urlparse(url).netloc
|
||||||
# global CF_LIST
|
from lib import requests
|
||||||
CF = False
|
session = requests.session()
|
||||||
|
|
||||||
if domain in FORCE_CLOUDSCRAPER_LIST or opt.get('cf', False):
|
|
||||||
from lib import cloudscraper
|
|
||||||
session = cloudscraper.create_scraper()
|
|
||||||
CF = True
|
|
||||||
else:
|
|
||||||
from lib import requests
|
|
||||||
session = requests.session()
|
|
||||||
|
|
||||||
# if domain in CF_LIST or opt.get('CF', False):
|
|
||||||
if opt.get('CF', False):
|
|
||||||
url = 'https://web.archive.org/save/' + url
|
|
||||||
CF = True
|
|
||||||
|
|
||||||
if config.get_setting('resolver_dns') and not opt.get('use_requests', False):
|
if config.get_setting('resolver_dns') and not opt.get('use_requests', False):
|
||||||
from core import resolverdns
|
from core import resolverdns
|
||||||
session.mount('https://', resolverdns.CipherSuiteAdapter(domain, CF))
|
session.mount('https://', resolverdns.CipherSuiteAdapter(domain))
|
||||||
|
|
||||||
req_headers = default_headers.copy()
|
req_headers = default_headers.copy()
|
||||||
|
|
||||||
@@ -401,30 +387,30 @@ def downloadpage(url, **opt):
|
|||||||
return type('HTTPResponse', (), response)
|
return type('HTTPResponse', (), response)
|
||||||
|
|
||||||
response_code = req.status_code
|
response_code = req.status_code
|
||||||
response['data'] = req.content if req.content else ''
|
|
||||||
response['url'] = req.url
|
response['url'] = req.url
|
||||||
|
|
||||||
|
if req.headers.get('Server', '').startswith('cloudflare') and response_code in [429, 503, 403]\
|
||||||
|
and not opt.get('CF', False) and 'Please turn JavaScript on and reload the page' in req.content:
|
||||||
|
logger.debug("CF retry... for domain: %s" % domain)
|
||||||
|
from lib import proxytranslate
|
||||||
|
gResp = proxytranslate.process_request_proxy(url)
|
||||||
|
if gResp:
|
||||||
|
req = gResp['result']
|
||||||
|
response_code = req.status_code
|
||||||
|
response['url'] = gResp['url']
|
||||||
|
response['data'] = gResp['data']
|
||||||
|
else:
|
||||||
|
response['data'] = req.content if req.content else ''
|
||||||
|
|
||||||
if type(response['data']) != str:
|
if type(response['data']) != str:
|
||||||
try: response['data'] = response['data'].decode('utf-8')
|
try:
|
||||||
except: response['data'] = response['data'].decode('ISO-8859-1')
|
response['data'] = response['data'].decode('utf-8')
|
||||||
|
except:
|
||||||
|
response['data'] = response['data'].decode('ISO-8859-1')
|
||||||
|
|
||||||
if not response['data']:
|
if not response['data']:
|
||||||
response['data'] = ''
|
response['data'] = ''
|
||||||
|
|
||||||
if req.headers.get('Server', '').startswith('cloudflare') and response_code in [429, 503, 403]\
|
|
||||||
and not opt.get('CF', False) and 'Please turn JavaScript on and reload the page' in response['data']:
|
|
||||||
# if domain not in CF_LIST:
|
|
||||||
opt["CF"] = True
|
|
||||||
# with open(CF_LIST_PATH, "a") as CF_File:
|
|
||||||
# CF_File.write("%s\n" % domain)
|
|
||||||
logger.debug("CF retry... for domain: %s" % domain)
|
|
||||||
return downloadpage(url, **opt)
|
|
||||||
|
|
||||||
if CF:
|
|
||||||
import re
|
|
||||||
response['data'] = re.sub('["|\']/save/[^"]*(https?://[^"]+)', '"\\1', response['data'])
|
|
||||||
response['url'] = response['url'].replace('https://web.archive.org/save/', '')
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response['json'] = to_utf8(req.json())
|
response['json'] = to_utf8(req.json())
|
||||||
except:
|
except:
|
||||||
@@ -439,14 +425,6 @@ def downloadpage(url, **opt):
|
|||||||
if opt.get('cookies', True):
|
if opt.get('cookies', True):
|
||||||
save_cookies(alfa_s=opt.get('alfa_s', False))
|
save_cookies(alfa_s=opt.get('alfa_s', False))
|
||||||
|
|
||||||
# is_channel = inspect.getmodule(inspect.currentframe().f_back)
|
|
||||||
# is_channel = scrapertools.find_single_match(str(is_channel), "<module '(channels).*?'")
|
|
||||||
# if is_channel and isinstance(response_code, int):
|
|
||||||
# if not opt.get('ignore_response_code', False) and not proxy_data.get('stat', ''):
|
|
||||||
# if response_code > 399:
|
|
||||||
# show_infobox(info_dict)
|
|
||||||
# raise WebErrorException(urlparse.urlparse(url)[1])
|
|
||||||
|
|
||||||
if not 'api.themoviedb' in url and not opt.get('alfa_s', False):
|
if not 'api.themoviedb' in url and not opt.get('alfa_s', False):
|
||||||
show_infobox(info_dict)
|
show_infobox(info_dict)
|
||||||
if not config.get_setting("debug"): logger.info('Page URL:',url)
|
if not config.get_setting("debug"): logger.info('Page URL:',url)
|
||||||
|
|||||||
@@ -0,0 +1,81 @@
|
|||||||
|
# based on https://github.com/MrCl0wnLab/ProxyGoogleTranslate
|
||||||
|
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from platformcode import logger
|
||||||
|
|
||||||
|
try:
|
||||||
|
from urllib import request
|
||||||
|
except:
|
||||||
|
import urllib as request
|
||||||
|
HEADERS = {
|
||||||
|
'Host': 'translate.google.com',
|
||||||
|
'User-Agent': 'android'
|
||||||
|
}
|
||||||
|
|
||||||
|
MAX_CONECTION_THREAD = 10
|
||||||
|
|
||||||
|
BASE_URL_PROXY = 'https://translate.googleusercontent.com'
|
||||||
|
BASE_URL_TRANSLATE = 'https://translate.google.com/translate?hl=it&sl=en&tl=it&u=[TARGET_URL]&sandbox=0' # noqa: E501
|
||||||
|
|
||||||
|
|
||||||
|
def checker_url(html, url):
|
||||||
|
grep_regex = re.findall(r'href="|src="|value="|((?:http[s]://|ftp[s]://)+\.*([-a-zA-Z0-9\.]+)([-a-zA-Z0-9\.]){1,}([-a-zA-Z0-9_\.\#\@\:%_/\?\=\~\&\-\//\!\'\;\(\)\s\^\:blank:\:punct:\:xdigit:\:space:\$]+))', html) # noqa: E501
|
||||||
|
for url_result_regex in grep_regex:
|
||||||
|
if url in url_result_regex[0]:
|
||||||
|
return url_result_regex[0].replace('&', '&')
|
||||||
|
|
||||||
|
|
||||||
|
def process_request_proxy(url):
|
||||||
|
if not url:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
target_url = \
|
||||||
|
BASE_URL_TRANSLATE.replace('[TARGET_URL]', request.quote(url))
|
||||||
|
|
||||||
|
logger.debug(target_url)
|
||||||
|
|
||||||
|
return_html = requests.get(target_url, timeout=20, headers=HEADERS)
|
||||||
|
|
||||||
|
if not return_html:
|
||||||
|
return
|
||||||
|
|
||||||
|
url_request = checker_url(
|
||||||
|
return_html.text,
|
||||||
|
BASE_URL_PROXY + '/translate_p?hl=it&sl=en&tl=it&u='
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug(url_request)
|
||||||
|
|
||||||
|
request_final = requests.get(
|
||||||
|
url_request,
|
||||||
|
timeout=20,
|
||||||
|
headers={'User-Agent': 'android'}
|
||||||
|
)
|
||||||
|
|
||||||
|
url_request_proxy = checker_url(
|
||||||
|
request_final.text, BASE_URL_PROXY + '/translate_c?depth=1')
|
||||||
|
|
||||||
|
logger.debug(url_request_proxy)
|
||||||
|
|
||||||
|
result = None
|
||||||
|
while not result or 'Sto traducendo' in result.content:
|
||||||
|
time.sleep(0.5)
|
||||||
|
result = requests.get(
|
||||||
|
url_request_proxy,
|
||||||
|
timeout=20,
|
||||||
|
headers={'User-Agent': 'android'}
|
||||||
|
)
|
||||||
|
logger.debug()
|
||||||
|
|
||||||
|
data = result.content.decode('utf-8', 'ignore').encode('utf-8')
|
||||||
|
data = re.sub('\s(\w+)=(?!")([^<>\s]+)', r' \1="\2"', data)
|
||||||
|
data = re.sub('https://translate\.googleusercontent\.com/.*?u=(.*?)&usg=[A-Za-z0-9_-]+',
|
||||||
|
'\\1', data)
|
||||||
|
|
||||||
|
return {'url': url.strip(), 'result': result, 'data': data.replace('&', '&')}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(e)
|
||||||
Reference in New Issue
Block a user