migliorati test per i server e alcuni fix

This commit is contained in:
marco
2020-07-18 17:38:27 +02:00
parent c41e4b9a3c
commit 7844ddaba7
9 changed files with 60 additions and 49 deletions
+8 -7
View File
@@ -56,7 +56,7 @@ HTTPTOOLS_DEFAULT_RANDOM_HEADERS = False
# with open(CF_LIST_PATH, "rb") as CF_File: # with open(CF_LIST_PATH, "rb") as CF_File:
# CF_LIST = CF_File.read().splitlines() # CF_LIST = CF_File.read().splitlines()
FORCE_CLOUDSCRAPER_LIST = [] FORCE_CLOUDSCRAPER_LIST = ['akvideo.stream']
def get_user_agent(): def get_user_agent():
# Returns the global user agent to be used when necessary for the url. # Returns the global user agent to be used when necessary for the url.
@@ -256,7 +256,7 @@ def downloadpage(url, **opt):
Parameter Type Description Parameter Type Description
-------------------------------------------------- -------------------------------------------------- ------------ -------------------------------------------------- -------------------------------------------------- ------------
HTTPResponse.sucess: bool True: Request successful | False: Error when making the request HTTPResponse.success: bool True: Request successful | False: Error when making the request
HTTPResponse.code: int Server response code or error code if an error occurs HTTPResponse.code: int Server response code or error code if an error occurs
HTTPResponse.error: str Description of the error in case of an error HTTPResponse.error: str Description of the error in case of an error
HTTPResponse.headers: dict Dictionary with server response headers HTTPResponse.headers: dict Dictionary with server response headers
@@ -380,9 +380,10 @@ def downloadpage(url, **opt):
req = requests.Response() req = requests.Response()
if not opt.get('ignore_response_code', False) and not proxy_data.get('stat', ''): if not opt.get('ignore_response_code', False) and not proxy_data.get('stat', ''):
response['data'] = '' response['data'] = ''
response['sucess'] = False response['success'] = False
info_dict.append(('Success', 'False')) info_dict.append(('Success', 'False'))
response['code'] = str(e) import traceback
response['code'] = traceback.format_exc()
info_dict.append(('Response code', str(e))) info_dict.append(('Response code', str(e)))
info_dict.append(('Finished in', time.time() - inicio)) info_dict.append(('Finished in', time.time() - inicio))
if not opt.get('alfa_s', False): if not opt.get('alfa_s', False):
@@ -393,7 +394,7 @@ def downloadpage(url, **opt):
else: else:
response['data'] = '' response['data'] = ''
response['sucess'] = False response['success'] = False
response['code'] = '' response['code'] = ''
return type('HTTPResponse', (), response) return type('HTTPResponse', (), response)
@@ -476,10 +477,10 @@ def fill_fields_post(info_dict, req, response, req_headers, inicio):
if response['code'] == 200: if response['code'] == 200:
info_dict.append(('Success', 'True')) info_dict.append(('Success', 'True'))
response['sucess'] = True response['success'] = True
else: else:
info_dict.append(('Success', 'False')) info_dict.append(('Success', 'False'))
response['sucess'] = False response['success'] = False
info_dict.append(('Response data length', len(response['data']))) info_dict.append(('Response data length', len(response['data'])))
+3 -3
View File
@@ -1336,9 +1336,9 @@ def addQualityTag(item, itemlist, data, patron):
else: else:
log('nessun tag qualità trovato') log('nessun tag qualità trovato')
def get_jwplayer_mediaurl(data, srvName): def get_jwplayer_mediaurl(data, srvName, onlyHttp=False):
video_urls = [] video_urls = []
block = scrapertools.find_single_match(data, r'sources: \[([^\]]+)\]') block = scrapertools.find_single_match(data, r'sources:\s*\[([^\]]+)\]')
if 'file:' in block: if 'file:' in block:
sources = scrapertools.find_multiple_matches(block, r'file:\s*"([^"]+)"(?:,label:\s*"([^"]+)")?') sources = scrapertools.find_multiple_matches(block, r'file:\s*"([^"]+)"(?:,label:\s*"([^"]+)")?')
elif 'src:' in block: elif 'src:' in block:
@@ -1348,7 +1348,7 @@ def get_jwplayer_mediaurl(data, srvName):
for url, quality in sources: for url, quality in sources:
quality = 'auto' if not quality else quality quality = 'auto' if not quality else quality
if url.split('.')[-1] != 'mpd': if url.split('.')[-1] != 'mpd':
video_urls.append(['.' + url.split('.')[-1] + ' [' + quality + '] [' + srvName + ']', url]) video_urls.append(['.' + url.split('.')[-1] + ' [' + quality + '] [' + srvName + ']', url if not onlyHttp else url.replace('https://', 'http://')])
video_urls.sort(key=lambda x: x[0].split()[1]) video_urls.sort(key=lambda x: x[0].split()[1])
return video_urls return video_urls
+2 -23
View File
@@ -1,9 +1,8 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# by DrZ3r0
import urllib import urllib
from core import httptools from core import httptools, support
from core import scrapertools from core import scrapertools
from platformcode import logger, config from platformcode import logger, config
@@ -54,28 +53,8 @@ def get_video_url(page_url, premium=False, user="", password="", video_password=
from lib import jsunpack from lib import jsunpack
data = jsunpack.unpack(data_pack) data = jsunpack.unpack(data_pack)
block = scrapertools.find_single_match(data, "sources:\s\[([^\]]+)\]")
data = block if block else data
# URL
# logger.info(data)
if vres:
matches = scrapertools.find_multiple_matches(data, '''src:\s*["']?(http.*?\.mp4)''')
else:
matches = scrapertools.find_multiple_matches(data, '''src:\s*["']?(http.*?\.mp4)(?:[^,]+,[^,]+,res:([^,]+))?''')
if matches:
if len(matches[0])==2:
i=0
for m in matches:
vres.append("%sx" % m[1])
matches[i]=m[0]
i+=1
_headers = urllib.urlencode(httptools.default_headers) _headers = urllib.urlencode(httptools.default_headers)
video_urls = support.get_jwplayer_mediaurl(data, 'akvideo', onlyHttp=True)
i = 0
for media_url in matches:
# URL del vídeo
video_urls.append([vres[i] if i<len(vres) else "" + " mp4 [Akvideo] ", media_url.replace('https://', 'http://') + '|' + _headers])
i = i + 1
return sorted(video_urls, key=lambda x: int(x[0].split('x')[0])) if vres else video_urls return sorted(video_urls, key=lambda x: int(x[0].split('x')[0])) if vres else video_urls
+2 -2
View File
@@ -12,8 +12,8 @@ def test_video_exists(page_url):
html = httptools.downloadpage(page_url) html = httptools.downloadpage(page_url)
global data global data
data = html.data data = html.data
if html.code == 404: if html.code == 404 or 'No Signal 404 Error Page' in data:
return False, config.get_localized_string(70292) % "CloudVideo" return False, config.get_localized_string(70449) % "CloudVideo"
return True, "" return True, ""
+3
View File
@@ -3,6 +3,9 @@
from platformcode import logger, config from platformcode import logger, config
def test_video_exists(page_url):
return True, ""
# Returns an array of possible video url's from the page_url # Returns an array of possible video url's from the page_url
def get_video_url(page_url, premium=False, user="", password="", video_password=""): def get_video_url(page_url, premium=False, user="", password="", video_password=""):
logger.info("(page_url='%s')" % page_url) logger.info("(page_url='%s')" % page_url)
+8 -1
View File
@@ -3,7 +3,7 @@
# Conector Mixdrop By Alfa development Group # Conector Mixdrop By Alfa development Group
# -------------------------------------------------------- # --------------------------------------------------------
from core import httptools from core import httptools, servertools
from core import scrapertools from core import scrapertools
from lib import jsunpack from lib import jsunpack
from platformcode import logger, config from platformcode import logger, config
@@ -13,6 +13,12 @@ def test_video_exists(page_url):
logger.info("(page_url='%s')" % page_url) logger.info("(page_url='%s')" % page_url)
global data global data
data = httptools.downloadpage(page_url).data data = httptools.downloadpage(page_url).data
if 'window.location' in data:
domain = 'https://' + servertools.get_server_host('mixdrop')[0]
url = domain + scrapertools.find_single_match(data, "window\.location\s*=\s*[\"']([^\"']+)")
data = httptools.downloadpage(url).data
if "<h2>WE ARE SORRY</h2>" in data or '<title>404 Not Found</title>' in data: if "<h2>WE ARE SORRY</h2>" in data or '<title>404 Not Found</title>' in data:
return False, config.get_localized_string(70449) % "MixDrop" return False, config.get_localized_string(70449) % "MixDrop"
return True, "" return True, ""
@@ -23,6 +29,7 @@ def get_video_url(page_url, premium=False, user="", password="", video_password=
video_urls = [] video_urls = []
ext = '.mp4' ext = '.mp4'
global data
packed = scrapertools.find_single_match(data, r'(eval.*?)</script>') packed = scrapertools.find_single_match(data, r'(eval.*?)</script>')
unpacked = jsunpack.unpack(packed) unpacked = jsunpack.unpack(packed)
+3
View File
@@ -17,6 +17,9 @@ monitor = filetools.join(config.get_data_path(), 'elementum_monitor.json')
extensions_list = ['.aaf', '.3gp', '.asf', '.avi', '.flv', '.mpeg', '.m1v', '.m2v', '.m4v', '.mkv', '.mov', '.mpg', '.mpe', '.mp4', '.ogg', '.wmv'] extensions_list = ['.aaf', '.3gp', '.asf', '.avi', '.flv', '.mpeg', '.m1v', '.m2v', '.m4v', '.mkv', '.mov', '.mpg', '.mpe', '.mp4', '.ogg', '.wmv']
def test_video_exists(page_url):
return True, ""
# Returns an array of possible video url's from the page_url # Returns an array of possible video url's from the page_url
def get_video_url(page_url, premium=False, user='', password='', video_password=''): def get_video_url(page_url, premium=False, user='', password='', video_password=''):
+4 -4
View File
@@ -92,7 +92,7 @@ def test_video_exists(page_url):
data = httptools.downloadpage(page_url).data data = httptools.downloadpage(page_url).data
if "File was deleted" in data: if "File was deleted" in data or "Video non disponibile" in data:
return False, config.get_localized_string(70449) % "Youtube" return False, config.get_localized_string(70449) % "Youtube"
return True, "" return True, ""
@@ -107,7 +107,7 @@ def get_video_url(page_url, premium=False, user="", password="", video_password=
video_id = scrapertools.find_single_match(page_url, '(?:v=|embed/)([A-z0-9_-]{11})') video_id = scrapertools.find_single_match(page_url, '(?:v=|embed/)([A-z0-9_-]{11})')
video_urls = extract_videos(video_id) video_urls = extract_videos(video_id)
return video_urls return sorted(video_urls, reverse=True)
def remove_additional_ending_delimiter(data): def remove_additional_ending_delimiter(data):
@@ -215,8 +215,8 @@ def extract_videos(video_id):
url = re.search('url=(.*)', opt["cipher"]).group(1) url = re.search('url=(.*)', opt["cipher"]).group(1)
s = cipher.get('s') s = cipher.get('s')
url = "%s&sig=%s" % (urllib.unquote(url), signature([s])) url = "%s&sig=%s" % (urllib.unquote(url), signature([s]))
video_urls.append(["%s" % itag_list.get(opt["itag"], "audio"), url]) video_urls.append(["%s" % itag_list.get(opt["itag"], "video"), url])
elif opt["itag"] in itag_list: elif opt["itag"] in itag_list:
video_urls.append(["%s" % itag_list.get(opt["itag"], "video"), opt["url"]]) video_urls.append(["%s" % itag_list.get(opt["itag"], "video"), opt["url"]])
return video_urls return video_urls
+27 -9
View File
@@ -4,7 +4,6 @@ import sys
import unittest import unittest
import parameterized import parameterized
from lib import requests
from platformcode import config from platformcode import config
config.set_setting('tmdb_active', False) config.set_setting('tmdb_active', False)
@@ -13,6 +12,8 @@ librerias = os.path.join(config.get_runtime_path(), 'lib')
sys.path.insert(0, librerias) sys.path.insert(0, librerias)
from core.support import typo from core.support import typo
from core.item import Item from core.item import Item
from core.httptools import downloadpage
from core import servertools
import channelselector import channelselector
import re import re
@@ -106,7 +107,7 @@ chNumRis = {
def getChannels(): def getChannels():
channel_list = channelselector.filterchannels("all")[0:2] channel_list = channelselector.filterchannels("all")
ret = [] ret = []
for chItem in channel_list: for chItem in channel_list:
ch = chItem.channel ch = chItem.channel
@@ -141,7 +142,7 @@ class GenericChannelTest(unittest.TestCase):
self.assertTrue(mainlist, 'channel ' + self.ch + ' has no menu') self.assertTrue(mainlist, 'channel ' + self.ch + ' has no menu')
for it in mainlist: for it in mainlist:
# it.title = it.title.decode('ascii', 'ignore') print 'testing ' + self.ch + ' -> ' + it.title
if it.action == 'channel_config': if it.action == 'channel_config':
hasChannelConfig = True hasChannelConfig = True
continue continue
@@ -161,6 +162,7 @@ class GenericChannelTest(unittest.TestCase):
self.assertLess(len(resIt.fulltitle), 110, self.assertLess(len(resIt.fulltitle), 110,
'channel ' + self.ch + ' -> ' + it.title + ' might contain wrong titles\n' + resIt.fulltitle) 'channel ' + self.ch + ' -> ' + it.title + ' might contain wrong titles\n' + resIt.fulltitle)
if resIt.url: if resIt.url:
self.assertIsInstance(resIt.url, str, 'channel ' + self.ch + ' -> ' + it.title + ' -> ' + resIt.title + ' contain non-string url')
self.assertIsNotNone(re.match(validUrlRegex, resIt.url), self.assertIsNotNone(re.match(validUrlRegex, resIt.url),
'channel ' + self.ch + ' -> ' + it.title + ' -> ' + resIt.title + ' might contain wrong url\n' + resIt.url) 'channel ' + self.ch + ' -> ' + it.title + ' -> ' + resIt.title + ' might contain wrong url\n' + resIt.url)
if 'year' in resIt.infoLabels and resIt.infoLabels['year']: if 'year' in resIt.infoLabels and resIt.infoLabels['year']:
@@ -177,17 +179,26 @@ class GenericChannelTest(unittest.TestCase):
# some sites might have no link inside, but if all results are without servers, there's something wrong # some sites might have no link inside, but if all results are without servers, there's something wrong
servers = [] servers = []
for resIt in itemlist: for resIt in itemlist:
servers = getattr(self.module, resIt.action)(resIt) if hasattr(self.module, resIt.action):
servers = getattr(self.module, resIt.action)(resIt)
else:
servers = [resIt]
if servers: if servers:
break break
self.assertTrue(servers, 'channel ' + self.ch + ' -> ' + it.title + ' has no servers on all results') self.assertTrue(servers, 'channel ' + self.ch + ' -> ' + it.title + ' has no servers on all results')
for server in servers: for server in servers:
srv = server.server srv = server.server.lower()
if not srv:
continue
module = __import__('servers.%s' % srv, fromlist=["servers.%s" % srv]) module = __import__('servers.%s' % srv, fromlist=["servers.%s" % srv])
page_url = server.url page_url = server.url
print 'testing ' + page_url print 'testing ' + page_url
self.assert_(hasattr(module, 'test_video_exists'), srv + ' has no test_video_exists')
if module.test_video_exists(page_url)[0]: if module.test_video_exists(page_url)[0]:
urls = module.get_video_url(page_url) urls = module.get_video_url(page_url)
server_parameters = servertools.get_server_parameters(srv)
self.assertTrue(urls or server_parameters.get("premium"), srv + ' scraper did not return direct urls for ' + page_url)
print urls print urls
for u in urls: for u in urls:
spl = u[1].split('|') spl = u[1].split('|')
@@ -199,11 +210,17 @@ class GenericChannelTest(unittest.TestCase):
if headersUrl: if headersUrl:
for name in headersUrl.split('&'): for name in headersUrl.split('&'):
h, v = name.split('=') h, v = name.split('=')
headers[h] = v h = str(h)
headers[h] = str(v)
print headers print headers
contentType = requests.head(directUrl, headers=headers, timeout=15).headers['Content-Type'] if 'magnet:?' in directUrl: # check of magnet links not supported
self.assert_(contentType.startswith('video') or 'mpegurl' in contentType, continue
srv + ' scraper did not return valid url for link ' + page_url) page = downloadpage(directUrl, headers=headers, only_headers=True, use_requests=True)
self.assertTrue(page.success, srv + ' scraper returned an invalid link')
self.assertLess(page.code, 400, srv + ' scraper returned a ' + str(page.code) + ' link')
contentType = page.headers['Content-Type']
self.assert_(contentType.startswith('video') or 'mpegurl' in contentType or 'octet-stream' in contentType or 'dash+xml' in contentType,
srv + ' scraper did not return valid url for link ' + page_url + '\nDirect url: ' + directUrl + '\nContent-Type: ' + contentType)
self.assertTrue(hasChannelConfig, 'channel ' + self.ch + ' has no channel config') self.assertTrue(hasChannelConfig, 'channel ' + self.ch + ' has no channel config')
@@ -215,5 +232,6 @@ class GenericChannelTest(unittest.TestCase):
self.assertTrue(itemlist, 'channel ' + self.ch + ' returned no news for category ' + cat) self.assertTrue(itemlist, 'channel ' + self.ch + ' returned no news for category ' + cat)
break break
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()