From 7844ddaba7e049549dc3e4dc0eee505e8d807101 Mon Sep 17 00:00:00 2001 From: marco Date: Sat, 18 Jul 2020 17:38:27 +0200 Subject: [PATCH] migliorati test per i server e alcuni fix --- core/httptools.py | 15 ++++++++------- core/support.py | 6 +++--- servers/akvideo.py | 25 ++----------------------- servers/cloudvideo.py | 4 ++-- servers/directo.py | 3 +++ servers/mixdrop.py | 9 ++++++++- servers/torrent.py | 3 +++ servers/youtube.py | 8 ++++---- tests.py | 36 +++++++++++++++++++++++++++--------- 9 files changed, 60 insertions(+), 49 deletions(-) diff --git a/core/httptools.py b/core/httptools.py index 3a8cb5bb..1ec21351 100755 --- a/core/httptools.py +++ b/core/httptools.py @@ -56,7 +56,7 @@ HTTPTOOLS_DEFAULT_RANDOM_HEADERS = False # with open(CF_LIST_PATH, "rb") as CF_File: # CF_LIST = CF_File.read().splitlines() -FORCE_CLOUDSCRAPER_LIST = [] +FORCE_CLOUDSCRAPER_LIST = ['akvideo.stream'] def get_user_agent(): # Returns the global user agent to be used when necessary for the url. @@ -256,7 +256,7 @@ def downloadpage(url, **opt): Parameter Type Description -------------------------------------------------- -------------------------------------------------- ------------ - HTTPResponse.sucess: bool True: Request successful | False: Error when making the request + HTTPResponse.success: bool True: Request successful | False: Error when making the request HTTPResponse.code: int Server response code or error code if an error occurs HTTPResponse.error: str Description of the error in case of an error HTTPResponse.headers: dict Dictionary with server response headers @@ -380,9 +380,10 @@ def downloadpage(url, **opt): req = requests.Response() if not opt.get('ignore_response_code', False) and not proxy_data.get('stat', ''): response['data'] = '' - response['sucess'] = False + response['success'] = False info_dict.append(('Success', 'False')) - response['code'] = str(e) + import traceback + response['code'] = traceback.format_exc() info_dict.append(('Response code', str(e))) info_dict.append(('Finished in', time.time() - inicio)) if not opt.get('alfa_s', False): @@ -393,7 +394,7 @@ def downloadpage(url, **opt): else: response['data'] = '' - response['sucess'] = False + response['success'] = False response['code'] = '' return type('HTTPResponse', (), response) @@ -476,10 +477,10 @@ def fill_fields_post(info_dict, req, response, req_headers, inicio): if response['code'] == 200: info_dict.append(('Success', 'True')) - response['sucess'] = True + response['success'] = True else: info_dict.append(('Success', 'False')) - response['sucess'] = False + response['success'] = False info_dict.append(('Response data length', len(response['data']))) diff --git a/core/support.py b/core/support.py index 546604b9..3801f9bc 100755 --- a/core/support.py +++ b/core/support.py @@ -1336,9 +1336,9 @@ def addQualityTag(item, itemlist, data, patron): else: log('nessun tag qualità trovato') -def get_jwplayer_mediaurl(data, srvName): +def get_jwplayer_mediaurl(data, srvName, onlyHttp=False): video_urls = [] - block = scrapertools.find_single_match(data, r'sources: \[([^\]]+)\]') + block = scrapertools.find_single_match(data, r'sources:\s*\[([^\]]+)\]') if 'file:' in block: sources = scrapertools.find_multiple_matches(block, r'file:\s*"([^"]+)"(?:,label:\s*"([^"]+)")?') elif 'src:' in block: @@ -1348,7 +1348,7 @@ def get_jwplayer_mediaurl(data, srvName): for url, quality in sources: quality = 'auto' if not quality else quality if url.split('.')[-1] != 'mpd': - video_urls.append(['.' + url.split('.')[-1] + ' [' + quality + '] [' + srvName + ']', url]) + video_urls.append(['.' + url.split('.')[-1] + ' [' + quality + '] [' + srvName + ']', url if not onlyHttp else url.replace('https://', 'http://')]) video_urls.sort(key=lambda x: x[0].split()[1]) return video_urls \ No newline at end of file diff --git a/servers/akvideo.py b/servers/akvideo.py index 61660480..14ff3836 100644 --- a/servers/akvideo.py +++ b/servers/akvideo.py @@ -1,9 +1,8 @@ # -*- coding: utf-8 -*- -# by DrZ3r0 import urllib -from core import httptools +from core import httptools, support from core import scrapertools from platformcode import logger, config @@ -54,28 +53,8 @@ def get_video_url(page_url, premium=False, user="", password="", video_password= from lib import jsunpack data = jsunpack.unpack(data_pack) - block = scrapertools.find_single_match(data, "sources:\s\[([^\]]+)\]") - data = block if block else data - # URL - # logger.info(data) - if vres: - matches = scrapertools.find_multiple_matches(data, '''src:\s*["']?(http.*?\.mp4)''') - else: - matches = scrapertools.find_multiple_matches(data, '''src:\s*["']?(http.*?\.mp4)(?:[^,]+,[^,]+,res:([^,]+))?''') - if matches: - if len(matches[0])==2: - i=0 - for m in matches: - vres.append("%sx" % m[1]) - matches[i]=m[0] - i+=1 - _headers = urllib.urlencode(httptools.default_headers) + video_urls = support.get_jwplayer_mediaurl(data, 'akvideo', onlyHttp=True) - i = 0 - for media_url in matches: - # URL del vídeo - video_urls.append([vres[i] if iWE ARE SORRY" in data or '404 Not Found' in data: return False, config.get_localized_string(70449) % "MixDrop" return True, "" @@ -23,6 +29,7 @@ def get_video_url(page_url, premium=False, user="", password="", video_password= video_urls = [] ext = '.mp4' + global data packed = scrapertools.find_single_match(data, r'(eval.*?)') unpacked = jsunpack.unpack(packed) diff --git a/servers/torrent.py b/servers/torrent.py index f7fc2bf5..82cc2349 100755 --- a/servers/torrent.py +++ b/servers/torrent.py @@ -17,6 +17,9 @@ monitor = filetools.join(config.get_data_path(), 'elementum_monitor.json') extensions_list = ['.aaf', '.3gp', '.asf', '.avi', '.flv', '.mpeg', '.m1v', '.m2v', '.m4v', '.mkv', '.mov', '.mpg', '.mpe', '.mp4', '.ogg', '.wmv'] +def test_video_exists(page_url): + return True, "" + # Returns an array of possible video url's from the page_url def get_video_url(page_url, premium=False, user='', password='', video_password=''): diff --git a/servers/youtube.py b/servers/youtube.py index 60d805b6..fac8367c 100644 --- a/servers/youtube.py +++ b/servers/youtube.py @@ -92,7 +92,7 @@ def test_video_exists(page_url): data = httptools.downloadpage(page_url).data - if "File was deleted" in data: + if "File was deleted" in data or "Video non disponibile" in data: return False, config.get_localized_string(70449) % "Youtube" return True, "" @@ -107,7 +107,7 @@ def get_video_url(page_url, premium=False, user="", password="", video_password= video_id = scrapertools.find_single_match(page_url, '(?:v=|embed/)([A-z0-9_-]{11})') video_urls = extract_videos(video_id) - return video_urls + return sorted(video_urls, reverse=True) def remove_additional_ending_delimiter(data): @@ -215,8 +215,8 @@ def extract_videos(video_id): url = re.search('url=(.*)', opt["cipher"]).group(1) s = cipher.get('s') url = "%s&sig=%s" % (urllib.unquote(url), signature([s])) - video_urls.append(["%s" % itag_list.get(opt["itag"], "audio"), url]) + video_urls.append(["%s" % itag_list.get(opt["itag"], "video"), url]) elif opt["itag"] in itag_list: video_urls.append(["%s" % itag_list.get(opt["itag"], "video"), opt["url"]]) - return video_urls + return video_urls \ No newline at end of file diff --git a/tests.py b/tests.py index f6489f60..1681125b 100644 --- a/tests.py +++ b/tests.py @@ -4,7 +4,6 @@ import sys import unittest import parameterized -from lib import requests from platformcode import config config.set_setting('tmdb_active', False) @@ -13,6 +12,8 @@ librerias = os.path.join(config.get_runtime_path(), 'lib') sys.path.insert(0, librerias) from core.support import typo from core.item import Item +from core.httptools import downloadpage +from core import servertools import channelselector import re @@ -106,7 +107,7 @@ chNumRis = { def getChannels(): - channel_list = channelselector.filterchannels("all")[0:2] + channel_list = channelselector.filterchannels("all") ret = [] for chItem in channel_list: ch = chItem.channel @@ -141,7 +142,7 @@ class GenericChannelTest(unittest.TestCase): self.assertTrue(mainlist, 'channel ' + self.ch + ' has no menu') for it in mainlist: - # it.title = it.title.decode('ascii', 'ignore') + print 'testing ' + self.ch + ' -> ' + it.title if it.action == 'channel_config': hasChannelConfig = True continue @@ -161,6 +162,7 @@ class GenericChannelTest(unittest.TestCase): self.assertLess(len(resIt.fulltitle), 110, 'channel ' + self.ch + ' -> ' + it.title + ' might contain wrong titles\n' + resIt.fulltitle) if resIt.url: + self.assertIsInstance(resIt.url, str, 'channel ' + self.ch + ' -> ' + it.title + ' -> ' + resIt.title + ' contain non-string url') self.assertIsNotNone(re.match(validUrlRegex, resIt.url), 'channel ' + self.ch + ' -> ' + it.title + ' -> ' + resIt.title + ' might contain wrong url\n' + resIt.url) if 'year' in resIt.infoLabels and resIt.infoLabels['year']: @@ -177,17 +179,26 @@ class GenericChannelTest(unittest.TestCase): # some sites might have no link inside, but if all results are without servers, there's something wrong servers = [] for resIt in itemlist: - servers = getattr(self.module, resIt.action)(resIt) + if hasattr(self.module, resIt.action): + servers = getattr(self.module, resIt.action)(resIt) + else: + servers = [resIt] + if servers: break self.assertTrue(servers, 'channel ' + self.ch + ' -> ' + it.title + ' has no servers on all results') for server in servers: - srv = server.server + srv = server.server.lower() + if not srv: + continue module = __import__('servers.%s' % srv, fromlist=["servers.%s" % srv]) page_url = server.url print 'testing ' + page_url + self.assert_(hasattr(module, 'test_video_exists'), srv + ' has no test_video_exists') if module.test_video_exists(page_url)[0]: urls = module.get_video_url(page_url) + server_parameters = servertools.get_server_parameters(srv) + self.assertTrue(urls or server_parameters.get("premium"), srv + ' scraper did not return direct urls for ' + page_url) print urls for u in urls: spl = u[1].split('|') @@ -199,11 +210,17 @@ class GenericChannelTest(unittest.TestCase): if headersUrl: for name in headersUrl.split('&'): h, v = name.split('=') - headers[h] = v + h = str(h) + headers[h] = str(v) print headers - contentType = requests.head(directUrl, headers=headers, timeout=15).headers['Content-Type'] - self.assert_(contentType.startswith('video') or 'mpegurl' in contentType, - srv + ' scraper did not return valid url for link ' + page_url) + if 'magnet:?' in directUrl: # check of magnet links not supported + continue + page = downloadpage(directUrl, headers=headers, only_headers=True, use_requests=True) + self.assertTrue(page.success, srv + ' scraper returned an invalid link') + self.assertLess(page.code, 400, srv + ' scraper returned a ' + str(page.code) + ' link') + contentType = page.headers['Content-Type'] + self.assert_(contentType.startswith('video') or 'mpegurl' in contentType or 'octet-stream' in contentType or 'dash+xml' in contentType, + srv + ' scraper did not return valid url for link ' + page_url + '\nDirect url: ' + directUrl + '\nContent-Type: ' + contentType) self.assertTrue(hasChannelConfig, 'channel ' + self.ch + ' has no channel config') @@ -215,5 +232,6 @@ class GenericChannelTest(unittest.TestCase): self.assertTrue(itemlist, 'channel ' + self.ch + ' returned no news for category ' + cat) break + if __name__ == '__main__': unittest.main()