Nuovo metoto anti CF -> fix ilgenio

2020-12-08 17:50:24 +01:00
parent 702d89ff51
commit 0d8af39a0c
4 changed files with 114 additions and 64 deletions
--- a/channels.json
+++ b/channels.json
@@ -28,7 +28,7 @@
        "guardaserieclick": "https://www.guardaserie.date", 
        "guardaserieicu": "https://guardaserie.rocks", 
        "hd4me": "https://hd4me.net", 
-        "ilgeniodellostreaming": "https://ilgeniodellostreaming.pet", 
+        "ilgeniodellostreaming": "https://ilgeniodellostreaming.cat",
        "ilgeniodellostreaming_cam": "https://ilgeniodellostreaming.gold", 
        "italiaserie": "https://italiaserie.best", 
        "mondoserietv": "https://mondoserietv.fun", 
--- a/channels/ilgeniodellostreaming.py
+++ b/channels/ilgeniodellostreaming.py
@@ -47,7 +47,7 @@ def mainlist(item):
@support.scrape
 def peliculas(item):
    info()
-    # debug = True
+    # debugBlock = True

    if item.args == 'search':
        patronBlock = r'<div class="search-page">(?P<block>.*?)<footer class="main">'
@@ -104,7 +104,7 @@ def episodios(item):
    patron = r'<a href="(?P<url>[^"]+)"><img src="(?P<thumb>[^"]+)">.*?'\
             '<div class="numerando">(?P<episode>[^<]+).*?<div class="episodiotitle">'\
             '[^>]+>(?P<title>[^<]+)<\/a>'
-#    debug = True
+#   debug = True
    return locals()

@support.scrape
@@ -128,25 +128,16 @@ def search(item, text):
    info(text)
    itemlist = []
    text = text.replace(' ', '+')
-    item.url = host + "/wp-json/wp/v2/search?per_page=100&search=" + text
-    results = support.httptools.downloadpage(item.url).json
-    for r in results:
-        title = r['title']
-        longtitle = support.typo(title, 'bold')
-        if '[sub-ita]' in title.lower():
-            longtitle += support.typo('Sub-ITA', '_ [] color kod')
-            title = title.split('[')[0]
-        itemlist.append(item.clone(action='findvideos' if r['subtype'] == 'movies' else 'episodios',
-                   title=longtitle,
-                   fulltitle=title,
-                   show=title,
-                   contentTitle=title,
-                   contentSerieName=title,
-                   contentType='movie' if r['subtype'] == 'movies' else 'tvshow',
-                   url=r['url']))
-    # support.dbg()
-    support.tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
-    return itemlist
+    item.url = host + "?s=" + text
+    # try:
+    item.args = 'search'
+    return peliculas(item)
+    # except:
+    #     import sys
+    #     for line in sys.exc_info():
+    #         info("%s" % line)
+
+    return []

 def newest(categoria):
    info(categoria)
--- a/core/httptools.py
+++ b/core/httptools.py
@@ -58,7 +58,6 @@ HTTPTOOLS_DEFAULT_RANDOM_HEADERS = False
 #     with open(CF_LIST_PATH, "rb") as CF_File:
 #         CF_LIST = CF_File.read().splitlines()

-FORCE_CLOUDSCRAPER_LIST = ['akvideo.stream']

 def get_user_agent():
    # Returns the global user agent to be used when necessary for the url.
@@ -269,25 +268,12 @@ def downloadpage(url, **opt):
        """
    url = scrapertools.unescape(url)
    domain = urlparse.urlparse(url).netloc
-    # global CF_LIST
-    CF = False
-
-    if domain in FORCE_CLOUDSCRAPER_LIST or opt.get('cf', False):
-        from lib import cloudscraper
-        session = cloudscraper.create_scraper()
-        CF = True
-    else:
-        from lib import requests
-        session = requests.session()
-
-    # if domain in CF_LIST or opt.get('CF', False):
-    if opt.get('CF', False):
-        url = 'https://web.archive.org/save/' + url
-        CF = True
+    from lib import requests
+    session = requests.session()

    if config.get_setting('resolver_dns') and not opt.get('use_requests', False):
        from core import resolverdns
-        session.mount('https://', resolverdns.CipherSuiteAdapter(domain, CF))
+        session.mount('https://', resolverdns.CipherSuiteAdapter(domain))

    req_headers = default_headers.copy()

@@ -401,30 +387,30 @@ def downloadpage(url, **opt):
        return type('HTTPResponse', (), response)

    response_code = req.status_code
-    response['data'] = req.content if req.content else ''
    response['url'] = req.url

+    if req.headers.get('Server', '').startswith('cloudflare') and response_code in [429, 503, 403]\
+            and not opt.get('CF', False) and 'Please turn JavaScript on and reload the page' in req.content:
+        logger.debug("CF retry... for domain: %s" % domain)
+        from lib import proxytranslate
+        gResp = proxytranslate.process_request_proxy(url)
+        if gResp:
+            req = gResp['result']
+            response_code = req.status_code
+            response['url'] = gResp['url']
+            response['data'] = gResp['data']
+    else:
+        response['data'] = req.content if req.content else ''
+
    if type(response['data']) != str:
-        try: response['data'] = response['data'].decode('utf-8')
-        except: response['data'] = response['data'].decode('ISO-8859-1')
+        try:
+            response['data'] = response['data'].decode('utf-8')
+        except:
+            response['data'] = response['data'].decode('ISO-8859-1')

    if not response['data']:
        response['data'] = ''

-    if req.headers.get('Server', '').startswith('cloudflare') and response_code in [429, 503, 403]\
-            and not opt.get('CF', False) and 'Please turn JavaScript on and reload the page' in response['data']:
-        # if domain not in CF_LIST:
-        opt["CF"] = True
-        # with open(CF_LIST_PATH, "a") as CF_File:
-        #     CF_File.write("%s\n" % domain)
-        logger.debug("CF retry... for domain: %s" % domain)
-        return downloadpage(url, **opt)
-
-    if CF:
-        import re
-        response['data'] = re.sub('["|\']/save/[^"]*(https?://[^"]+)', '"\\1', response['data'])
-        response['url'] = response['url'].replace('https://web.archive.org/save/', '')
-
    try:
        response['json'] = to_utf8(req.json())
    except:
@@ -439,14 +425,6 @@ def downloadpage(url, **opt):
    if opt.get('cookies', True):
        save_cookies(alfa_s=opt.get('alfa_s', False))

-    # is_channel = inspect.getmodule(inspect.currentframe().f_back)
-    # is_channel = scrapertools.find_single_match(str(is_channel), "<module '(channels).*?'")
-    # if is_channel and isinstance(response_code, int):
-    #     if not opt.get('ignore_response_code', False) and not proxy_data.get('stat', ''):
-    #         if response_code > 399:
-    #             show_infobox(info_dict)
-    #             raise WebErrorException(urlparse.urlparse(url)[1])
-
    if not 'api.themoviedb' in url and not opt.get('alfa_s', False):
        show_infobox(info_dict)
    if not config.get_setting("debug"): logger.info('Page URL:',url)
--- a/lib/proxytranslate.py
+++ b/lib/proxytranslate.py
@@ -0,0 +1,81 @@
+# based on https://github.com/MrCl0wnLab/ProxyGoogleTranslate
+
+import re
+import time
+
+import requests
+from platformcode import logger
+
+try:
+    from urllib import request
+except:
+    import urllib as request
+HEADERS = {
+    'Host': 'translate.google.com',
+    'User-Agent': 'android'
+}
+
+MAX_CONECTION_THREAD = 10
+
+BASE_URL_PROXY = 'https://translate.googleusercontent.com'
+BASE_URL_TRANSLATE = 'https://translate.google.com/translate?hl=it&sl=en&tl=it&u=[TARGET_URL]&sandbox=0'  # noqa: E501
+
+
+def checker_url(html, url):
+    grep_regex = re.findall(r'href="|src="|value="|((?:http[s]://|ftp[s]://)+\.*([-a-zA-Z0-9\.]+)([-a-zA-Z0-9\.]){1,}([-a-zA-Z0-9_\.\#\@\:%_/\?\=\~\&\-\//\!\'\;\(\)\s\^\:blank:\:punct:\:xdigit:\:space:\$]+))', html)  # noqa: E501
+    for url_result_regex in grep_regex:
+        if url in url_result_regex[0]:
+            return url_result_regex[0].replace('&amp;', '&')
+
+
+def process_request_proxy(url):
+    if not url:
+        return
+
+    try:
+        target_url = \
+            BASE_URL_TRANSLATE.replace('[TARGET_URL]', request.quote(url))
+
+        logger.debug(target_url)
+
+        return_html = requests.get(target_url, timeout=20, headers=HEADERS)
+
+        if not return_html:
+            return
+
+        url_request = checker_url(
+            return_html.text,
+            BASE_URL_PROXY + '/translate_p?hl=it&sl=en&tl=it&u='
+        )
+
+        logger.debug(url_request)
+
+        request_final = requests.get(
+            url_request,
+            timeout=20,
+            headers={'User-Agent': 'android'}
+        )
+
+        url_request_proxy = checker_url(
+            request_final.text, BASE_URL_PROXY + '/translate_c?depth=1')
+
+        logger.debug(url_request_proxy)
+
+        result = None
+        while not result or 'Sto traducendo' in result.content:
+            time.sleep(0.5)
+            result = requests.get(
+                url_request_proxy,
+                timeout=20,
+                headers={'User-Agent': 'android'}
+            )
+            logger.debug()
+
+        data = result.content.decode('utf-8', 'ignore').encode('utf-8')
+        data = re.sub('\s(\w+)=(?!")([^<>\s]+)', r' \1="\2"', data)
+        data = re.sub('https://translate\.googleusercontent\.com/.*?u=(.*?)&amp;usg=[A-Za-z0-9_-]+',
+                      '\\1', data)
+
+        return {'url': url.strip(), 'result': result, 'data': data.replace('&amp;', '&')}
+    except Exception as e:
+        logger.error(e)