[^<]+).*?'\
'[^>]+>(?P
[^<]+)<\/a>'
-# debug = True
+# debug = True
return locals()
@support.scrape
@@ -128,25 +128,16 @@ def search(item, text):
info(text)
itemlist = []
text = text.replace(' ', '+')
- item.url = host + "/wp-json/wp/v2/search?per_page=100&search=" + text
- results = support.httptools.downloadpage(item.url).json
- for r in results:
- title = r['title']
- longtitle = support.typo(title, 'bold')
- if '[sub-ita]' in title.lower():
- longtitle += support.typo('Sub-ITA', '_ [] color kod')
- title = title.split('[')[0]
- itemlist.append(item.clone(action='findvideos' if r['subtype'] == 'movies' else 'episodios',
- title=longtitle,
- fulltitle=title,
- show=title,
- contentTitle=title,
- contentSerieName=title,
- contentType='movie' if r['subtype'] == 'movies' else 'tvshow',
- url=r['url']))
- # support.dbg()
- support.tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
- return itemlist
+ item.url = host + "?s=" + text
+ # try:
+ item.args = 'search'
+ return peliculas(item)
+ # except:
+ # import sys
+ # for line in sys.exc_info():
+ # info("%s" % line)
+
+ return []
def newest(categoria):
info(categoria)
diff --git a/core/httptools.py b/core/httptools.py
index ae7de722..18ecd4d4 100755
--- a/core/httptools.py
+++ b/core/httptools.py
@@ -58,7 +58,6 @@ HTTPTOOLS_DEFAULT_RANDOM_HEADERS = False
# with open(CF_LIST_PATH, "rb") as CF_File:
# CF_LIST = CF_File.read().splitlines()
-FORCE_CLOUDSCRAPER_LIST = ['akvideo.stream']
def get_user_agent():
# Returns the global user agent to be used when necessary for the url.
@@ -269,25 +268,12 @@ def downloadpage(url, **opt):
"""
url = scrapertools.unescape(url)
domain = urlparse.urlparse(url).netloc
- # global CF_LIST
- CF = False
-
- if domain in FORCE_CLOUDSCRAPER_LIST or opt.get('cf', False):
- from lib import cloudscraper
- session = cloudscraper.create_scraper()
- CF = True
- else:
- from lib import requests
- session = requests.session()
-
- # if domain in CF_LIST or opt.get('CF', False):
- if opt.get('CF', False):
- url = 'https://web.archive.org/save/' + url
- CF = True
+ from lib import requests
+ session = requests.session()
if config.get_setting('resolver_dns') and not opt.get('use_requests', False):
from core import resolverdns
- session.mount('https://', resolverdns.CipherSuiteAdapter(domain, CF))
+ session.mount('https://', resolverdns.CipherSuiteAdapter(domain))
req_headers = default_headers.copy()
@@ -401,30 +387,30 @@ def downloadpage(url, **opt):
return type('HTTPResponse', (), response)
response_code = req.status_code
- response['data'] = req.content if req.content else ''
response['url'] = req.url
+ if req.headers.get('Server', '').startswith('cloudflare') and response_code in [429, 503, 403]\
+ and not opt.get('CF', False) and 'Please turn JavaScript on and reload the page' in req.content:
+ logger.debug("CF retry... for domain: %s" % domain)
+ from lib import proxytranslate
+ gResp = proxytranslate.process_request_proxy(url)
+ if gResp:
+ req = gResp['result']
+ response_code = req.status_code
+ response['url'] = gResp['url']
+ response['data'] = gResp['data']
+ else:
+ response['data'] = req.content if req.content else ''
+
if type(response['data']) != str:
- try: response['data'] = response['data'].decode('utf-8')
- except: response['data'] = response['data'].decode('ISO-8859-1')
+ try:
+ response['data'] = response['data'].decode('utf-8')
+ except:
+ response['data'] = response['data'].decode('ISO-8859-1')
if not response['data']:
response['data'] = ''
- if req.headers.get('Server', '').startswith('cloudflare') and response_code in [429, 503, 403]\
- and not opt.get('CF', False) and 'Please turn JavaScript on and reload the page' in response['data']:
- # if domain not in CF_LIST:
- opt["CF"] = True
- # with open(CF_LIST_PATH, "a") as CF_File:
- # CF_File.write("%s\n" % domain)
- logger.debug("CF retry... for domain: %s" % domain)
- return downloadpage(url, **opt)
-
- if CF:
- import re
- response['data'] = re.sub('["|\']/save/[^"]*(https?://[^"]+)', '"\\1', response['data'])
- response['url'] = response['url'].replace('https://web.archive.org/save/', '')
-
try:
response['json'] = to_utf8(req.json())
except:
@@ -439,14 +425,6 @@ def downloadpage(url, **opt):
if opt.get('cookies', True):
save_cookies(alfa_s=opt.get('alfa_s', False))
- # is_channel = inspect.getmodule(inspect.currentframe().f_back)
- # is_channel = scrapertools.find_single_match(str(is_channel), " 399:
- # show_infobox(info_dict)
- # raise WebErrorException(urlparse.urlparse(url)[1])
-
if not 'api.themoviedb' in url and not opt.get('alfa_s', False):
show_infobox(info_dict)
if not config.get_setting("debug"): logger.info('Page URL:',url)
diff --git a/lib/proxytranslate.py b/lib/proxytranslate.py
new file mode 100644
index 00000000..6b712ac0
--- /dev/null
+++ b/lib/proxytranslate.py
@@ -0,0 +1,81 @@
+# based on https://github.com/MrCl0wnLab/ProxyGoogleTranslate
+
+import re
+import time
+
+import requests
+from platformcode import logger
+
+try:
+ from urllib import request
+except:
+ import urllib as request
+HEADERS = {
+ 'Host': 'translate.google.com',
+ 'User-Agent': 'android'
+}
+
+MAX_CONECTION_THREAD = 10
+
+BASE_URL_PROXY = 'https://translate.googleusercontent.com'
+BASE_URL_TRANSLATE = 'https://translate.google.com/translate?hl=it&sl=en&tl=it&u=[TARGET_URL]&sandbox=0' # noqa: E501
+
+
+def checker_url(html, url):
+ grep_regex = re.findall(r'href="|src="|value="|((?:http[s]://|ftp[s]://)+\.*([-a-zA-Z0-9\.]+)([-a-zA-Z0-9\.]){1,}([-a-zA-Z0-9_\.\#\@\:%_/\?\=\~\&\-\//\!\'\;\(\)\s\^\:blank:\:punct:\:xdigit:\:space:\$]+))', html) # noqa: E501
+ for url_result_regex in grep_regex:
+ if url in url_result_regex[0]:
+ return url_result_regex[0].replace('&', '&')
+
+
+def process_request_proxy(url):
+ if not url:
+ return
+
+ try:
+ target_url = \
+ BASE_URL_TRANSLATE.replace('[TARGET_URL]', request.quote(url))
+
+ logger.debug(target_url)
+
+ return_html = requests.get(target_url, timeout=20, headers=HEADERS)
+
+ if not return_html:
+ return
+
+ url_request = checker_url(
+ return_html.text,
+ BASE_URL_PROXY + '/translate_p?hl=it&sl=en&tl=it&u='
+ )
+
+ logger.debug(url_request)
+
+ request_final = requests.get(
+ url_request,
+ timeout=20,
+ headers={'User-Agent': 'android'}
+ )
+
+ url_request_proxy = checker_url(
+ request_final.text, BASE_URL_PROXY + '/translate_c?depth=1')
+
+ logger.debug(url_request_proxy)
+
+ result = None
+ while not result or 'Sto traducendo' in result.content:
+ time.sleep(0.5)
+ result = requests.get(
+ url_request_proxy,
+ timeout=20,
+ headers={'User-Agent': 'android'}
+ )
+ logger.debug()
+
+ data = result.content.decode('utf-8', 'ignore').encode('utf-8')
+ data = re.sub('\s(\w+)=(?!")([^<>\s]+)', r' \1="\2"', data)
+ data = re.sub('https://translate\.googleusercontent\.com/.*?u=(.*?)&usg=[A-Za-z0-9_-]+',
+ '\\1', data)
+
+ return {'url': url.strip(), 'result': result, 'data': data.replace('&', '&')}
+ except Exception as e:
+ logger.error(e)