migliorati test per i server e alcuni fix

2020-07-18 17:38:27 +02:00
parent c41e4b9a3c
commit 7844ddaba7
9 changed files with 60 additions and 49 deletions
@@ -56,7 +56,7 @@ HTTPTOOLS_DEFAULT_RANDOM_HEADERS = False
 #     with open(CF_LIST_PATH, "rb") as CF_File:
 #         CF_LIST = CF_File.read().splitlines()
-FORCE_CLOUDSCRAPER_LIST = []
+FORCE_CLOUDSCRAPER_LIST = ['akvideo.stream']
 def get_user_agent():
    # Returns the global user agent to be used when necessary for the url.
@@ -256,7 +256,7 @@ def downloadpage(url, **opt):
                Parameter Type Description
                -------------------------------------------------- -------------------------------------------------- ------------
-                HTTPResponse.sucess: bool True: Request successful | False: Error when making the request
+                HTTPResponse.success: bool True: Request successful | False: Error when making the request
                HTTPResponse.code: int Server response code or error code if an error occurs
                HTTPResponse.error: str Description of the error in case of an error
                HTTPResponse.headers: dict Dictionary with server response headers
@@ -380,9 +380,10 @@ def downloadpage(url, **opt):
            req = requests.Response()
            if not opt.get('ignore_response_code', False) and not proxy_data.get('stat', ''):
                response['data'] = ''
-                response['sucess'] = False
+                response['success'] = False
                info_dict.append(('Success', 'False'))
-                response['code'] = str(e)
+                import traceback
                response['code'] = traceback.format_exc()
                info_dict.append(('Response code', str(e)))
                info_dict.append(('Finished in', time.time() - inicio))
                if not opt.get('alfa_s', False):
@@ -393,7 +394,7 @@ def downloadpage(url, **opt):
    else:
        response['data'] = ''
-        response['sucess'] = False
+        response['success'] = False
        response['code'] = ''
        return type('HTTPResponse', (), response)
@@ -476,10 +477,10 @@ def fill_fields_post(info_dict, req, response, req_headers, inicio):
        if response['code'] == 200:
            info_dict.append(('Success', 'True'))
-            response['sucess'] = True
+            response['success'] = True
        else:
            info_dict.append(('Success', 'False'))
-            response['sucess'] = False
+            response['success'] = False
        info_dict.append(('Response data length', len(response['data'])))
@@ -1336,9 +1336,9 @@ def addQualityTag(item, itemlist, data, patron):
        else:
            log('nessun tag qualità trovato')
-def get_jwplayer_mediaurl(data, srvName):
+def get_jwplayer_mediaurl(data, srvName, onlyHttp=False):
    video_urls = []
-    block = scrapertools.find_single_match(data, r'sources: \[([^\]]+)\]')
+    block = scrapertools.find_single_match(data, r'sources:\s*\[([^\]]+)\]')
    if 'file:' in block:
        sources = scrapertools.find_multiple_matches(block, r'file:\s*"([^"]+)"(?:,label:\s*"([^"]+)")?')
    elif 'src:' in block:
@@ -1348,7 +1348,7 @@ def get_jwplayer_mediaurl(data, srvName):
    for url, quality in sources:
        quality = 'auto' if not quality else quality
        if url.split('.')[-1] != 'mpd':
-            video_urls.append(['.' + url.split('.')[-1] + ' [' + quality + '] [' + srvName + ']', url])
+            video_urls.append(['.' + url.split('.')[-1] + ' [' + quality + '] [' + srvName + ']', url if not onlyHttp else url.replace('https://', 'http://')])
    video_urls.sort(key=lambda x: x[0].split()[1])
    return video_urls
@@ -1,9 +1,8 @@
 # -*- coding: utf-8 -*-
 # by DrZ3r0
 import urllib
-from core import httptools
+from core import httptools, support
 from core import scrapertools
 from platformcode import logger, config
@@ -54,28 +53,8 @@ def get_video_url(page_url, premium=False, user="", password="", video_password=
        from lib import jsunpack
        data = jsunpack.unpack(data_pack)
    block = scrapertools.find_single_match(data, "sources:\s\[([^\]]+)\]")
    data = block if block else data
    # URL
    # logger.info(data)
    if vres:
        matches = scrapertools.find_multiple_matches(data, '''src:\s*["']?(http.*?\.mp4)''')
    else:
        matches = scrapertools.find_multiple_matches(data, '''src:\s*["']?(http.*?\.mp4)(?:[^,]+,[^,]+,res:([^,]+))?''')
        if matches:
            if len(matches[0])==2:
                i=0
                for m in matches:
                    vres.append("%sx" % m[1])
                    matches[i]=m[0]
                    i+=1
    _headers = urllib.urlencode(httptools.default_headers)
    video_urls = support.get_jwplayer_mediaurl(data, 'akvideo', onlyHttp=True)
    i = 0
    for media_url in matches:
        # URL del vídeo
        video_urls.append([vres[i] if i<len(vres) else "" + " mp4 [Akvideo] ", media_url.replace('https://', 'http://') + '|' + _headers])
        i = i + 1
    return sorted(video_urls, key=lambda x: int(x[0].split('x')[0])) if vres else video_urls
@@ -12,8 +12,8 @@ def test_video_exists(page_url):
    html = httptools.downloadpage(page_url)
    global data
    data = html.data
-    if html.code == 404:
+    if html.code == 404 or 'No Signal 404 Error Page' in data:
-        return False, config.get_localized_string(70292) % "CloudVideo"
+        return False, config.get_localized_string(70449) % "CloudVideo"
    return True, ""
@@ -3,6 +3,9 @@
 from platformcode import logger, config
 def test_video_exists(page_url):
    return True, ""
 # Returns an array of possible video url's from the page_url
 def get_video_url(page_url, premium=False, user="", password="", video_password=""):
    logger.info("(page_url='%s')" % page_url)
@@ -3,7 +3,7 @@
 # Conector Mixdrop By Alfa development Group
 # --------------------------------------------------------
-from core import httptools
+from core import httptools, servertools
 from core import scrapertools
 from lib import jsunpack
 from platformcode import logger, config
@@ -13,6 +13,12 @@ def test_video_exists(page_url):
    logger.info("(page_url='%s')" % page_url)
    global data
    data = httptools.downloadpage(page_url).data
    if 'window.location' in data:
        domain = 'https://' + servertools.get_server_host('mixdrop')[0]
        url = domain + scrapertools.find_single_match(data, "window\.location\s*=\s*[\"']([^\"']+)")
        data = httptools.downloadpage(url).data
    if "<h2>WE ARE SORRY</h2>" in data or '<title>404 Not Found</title>' in data:
        return False, config.get_localized_string(70449) % "MixDrop"
    return True, ""
@@ -23,6 +29,7 @@ def get_video_url(page_url, premium=False, user="", password="", video_password=
    video_urls = []
    ext = '.mp4'
    global data
    packed = scrapertools.find_single_match(data, r'(eval.*?)</script>')
    unpacked = jsunpack.unpack(packed)
@@ -17,6 +17,9 @@ monitor = filetools.join(config.get_data_path(), 'elementum_monitor.json')
 extensions_list = ['.aaf', '.3gp', '.asf', '.avi', '.flv', '.mpeg', '.m1v', '.m2v', '.m4v', '.mkv', '.mov', '.mpg', '.mpe', '.mp4', '.ogg', '.wmv']
 def test_video_exists(page_url):
    return True, ""
 # Returns an array of possible video url's from the page_url
 def get_video_url(page_url, premium=False, user='', password='', video_password=''):
@@ -92,7 +92,7 @@ def test_video_exists(page_url):
    data = httptools.downloadpage(page_url).data
-    if "File was deleted" in data:
+    if "File was deleted" in data or "Video non disponibile" in data:
        return False, config.get_localized_string(70449) % "Youtube"
    return True, ""
@@ -107,7 +107,7 @@ def get_video_url(page_url, premium=False, user="", password="", video_password=
    video_id = scrapertools.find_single_match(page_url, '(?:v=|embed/)([A-z0-9_-]{11})')
    video_urls = extract_videos(video_id)
-    return video_urls
+    return sorted(video_urls, reverse=True)
 def remove_additional_ending_delimiter(data):
@@ -215,8 +215,8 @@ def extract_videos(video_id):
                        url = re.search('url=(.*)', opt["cipher"]).group(1)
                        s = cipher.get('s')
                        url = "%s&sig=%s" % (urllib.unquote(url), signature([s]))
-                        video_urls.append(["%s" % itag_list.get(opt["itag"], "audio"), url])
+                        video_urls.append(["%s" % itag_list.get(opt["itag"], "video"), url])
                    elif opt["itag"] in itag_list:
                        video_urls.append(["%s" % itag_list.get(opt["itag"], "video"), opt["url"]])
-    return video_urls
+    return video_urls
@@ -4,7 +4,6 @@ import sys
 import unittest
 import parameterized
 from lib import requests
 from platformcode import config
 config.set_setting('tmdb_active', False)
@@ -13,6 +12,8 @@ librerias = os.path.join(config.get_runtime_path(), 'lib')
 sys.path.insert(0, librerias)
 from core.support import typo
 from core.item import Item
 from core.httptools import downloadpage
 from core import servertools
 import channelselector
 import re
@@ -106,7 +107,7 @@ chNumRis = {
 def getChannels():
-    channel_list = channelselector.filterchannels("all")[0:2]
+    channel_list = channelselector.filterchannels("all")
    ret = []
    for chItem in channel_list:
        ch = chItem.channel
@@ -141,7 +142,7 @@ class GenericChannelTest(unittest.TestCase):
        self.assertTrue(mainlist, 'channel ' + self.ch + ' has no menu')
        for it in mainlist:
-            # it.title = it.title.decode('ascii', 'ignore')
+            print 'testing ' + self.ch + ' -> ' + it.title
            if it.action == 'channel_config':
                hasChannelConfig = True
                continue
@@ -161,6 +162,7 @@ class GenericChannelTest(unittest.TestCase):
                self.assertLess(len(resIt.fulltitle), 110,
                                'channel ' + self.ch + ' -> ' + it.title + ' might contain wrong titles\n' + resIt.fulltitle)
                if resIt.url:
                    self.assertIsInstance(resIt.url, str, 'channel ' + self.ch + ' -> ' + it.title + ' -> ' + resIt.title + ' contain non-string url')
                    self.assertIsNotNone(re.match(validUrlRegex, resIt.url),
                                         'channel ' + self.ch + ' -> ' + it.title + ' -> ' + resIt.title + ' might contain wrong url\n' + resIt.url)
                if 'year' in resIt.infoLabels and resIt.infoLabels['year']:
@@ -177,17 +179,26 @@ class GenericChannelTest(unittest.TestCase):
            # some sites might have no link inside, but if all results are without servers, there's something wrong
            servers = []
            for resIt in itemlist:
-                servers = getattr(self.module, resIt.action)(resIt)
+                if hasattr(self.module, resIt.action):
                    servers = getattr(self.module, resIt.action)(resIt)
                else:
                    servers = [resIt]
                if servers:
                    break
            self.assertTrue(servers, 'channel ' + self.ch + ' -> ' + it.title + ' has no servers on all results')
            for server in servers:
-                srv = server.server
+                srv = server.server.lower()
                if not srv:
                    continue
                module = __import__('servers.%s' % srv, fromlist=["servers.%s" % srv])
                page_url = server.url
                print 'testing ' + page_url
                self.assert_(hasattr(module, 'test_video_exists'), srv + ' has no test_video_exists')
                if module.test_video_exists(page_url)[0]:
                    urls = module.get_video_url(page_url)
                    server_parameters = servertools.get_server_parameters(srv)
                    self.assertTrue(urls or server_parameters.get("premium"), srv + ' scraper did not return direct urls for ' + page_url)
                    print urls
                    for u in urls:
                        spl = u[1].split('|')
@@ -199,11 +210,17 @@ class GenericChannelTest(unittest.TestCase):
                        if headersUrl:
                            for name in headersUrl.split('&'):
                                h, v = name.split('=')
-                                headers[h] = v
+                                h = str(h)
                                headers[h] = str(v)
                            print headers
-                        contentType = requests.head(directUrl, headers=headers, timeout=15).headers['Content-Type']
+                        if 'magnet:?' in directUrl:  # check of magnet links not supported
-                        self.assert_(contentType.startswith('video') or 'mpegurl' in contentType,
+                            continue
-                                     srv + ' scraper did not return valid url for link ' + page_url)
+                        page = downloadpage(directUrl, headers=headers, only_headers=True, use_requests=True)
                        self.assertTrue(page.success, srv + ' scraper returned an invalid link')
                        self.assertLess(page.code, 400, srv + ' scraper returned a ' + str(page.code) + ' link')
                        contentType = page.headers['Content-Type']
                        self.assert_(contentType.startswith('video') or 'mpegurl' in contentType or 'octet-stream' in contentType or 'dash+xml' in contentType,
                                     srv + ' scraper did not return valid url for link ' + page_url + '\nDirect url: ' + directUrl + '\nContent-Type: ' + contentType)
        self.assertTrue(hasChannelConfig, 'channel ' + self.ch + ' has no channel config')
@@ -215,5 +232,6 @@ class GenericChannelTest(unittest.TestCase):
                    self.assertTrue(itemlist, 'channel ' + self.ch + ' returned no news for category ' + cat)
                    break
 if __name__ == '__main__':
    unittest.main()