From fce3bf259006d90a981b22fa3b84a84d8546fc2c Mon Sep 17 00:00:00 2001 From: Alhaziel Date: Wed, 15 Jan 2020 18:44:03 +0100 Subject: [PATCH] Migliorato support.match e sostituito ove usato - Fix Animeforce - Fix AnimeSubITA --- channels/animeforce.py | 143 +++++++++++++++++++++------------- channels/animeleggendari.py | 6 +- channels/animesaturn.py | 13 ++-- channels/animesubita.py | 52 ++++++------- channels/animetubeita.py | 4 +- channels/animeworld.py | 16 ++-- channels/casacinema.py | 6 +- channels/cb01anime.py | 12 +-- channels/cinetecadibologna.py | 2 +- channels/dreamsub.py | 25 +++--- channels/fastsubita.py | 6 +- channels/piratestreaming.py | 2 +- channels/seriehd.py | 6 +- channels/serietvsubita.py | 14 ++-- channels/serietvu.py | 2 +- channels/streamtime.py | 6 +- channels/tantifilm.py | 8 +- channels/toonitalia.py | 2 +- channels/vvvvid.py | 6 +- core/support.py | 97 ++++++++++++++++++----- specials/community.py | 8 +- 21 files changed, 271 insertions(+), 165 deletions(-) diff --git a/channels/animeforce.py b/channels/animeforce.py index cf2dccfc..565e395c 100644 --- a/channels/animeforce.py +++ b/channels/animeforce.py @@ -3,14 +3,11 @@ # Canale per AnimeForce # ------------------------------------------------------------ -from servers.decrypters import adfly from core import support host = support.config.get_channel_url() -IDIOMAS = {'Italiano': 'IT'} -list_language = IDIOMAS.values() -list_servers = ['directo', 'openload', 'vvvvid'] +list_servers = ['directo', 'vvvvid'] list_quality = ['default'] @@ -45,47 +42,40 @@ def newest(categoria): return itemlist -@support.scrape def search(item, texto): - # debug = True - search = texto + support.log(texto) + item.args = 'noorder' + item.url = host + '/?s=' + texto + '&cat=6010' item.contentType = 'tvshow' - patron = r'\s*]+>(?P[^<]+)<' - action = 'episodios' - return locals() + try: + return peliculas(item) + # Continua la ricerca in caso di errore + except: + import sys + for line in sys.exc_info(): + support.logger.error("%s" % line) + return [] @support.scrape def peliculas(item): anime = True action = 'episodios' - - if item.args == 'newest': - patron = r'<a href="(?P<url>[^"]+)">\s*<img src="(?P<thumb>[^"]+)" alt="(?P<title>.*?)(?: Sub| sub| SUB|")' - action = 'findvideos' - - elif item.args == 'last': - patron = r'<a href="(?P<url>[^"]+)">\s*<img src="(?P<thumb>[^"]+)" alt="(?P<title>.*?)(?: Sub| sub| SUB|")' - + if not item.args: + pagination = '' + patron = r'<a\s*href="(?P<url>[^"]+)"\s*title="(?P<title>[^"]+)">' elif item.args == 'corso': pagination = '' patron = r'<strong><a href="(?P<url>[^"]+)">(?P<title>.*?) [Ss][Uu][Bb]' else: - pagination = '' - patron = r'<a href="(?P<url>[^"]+)">\s*<strong[^>]+>(?P<title>[^<]+)<' + patron = r'<a href="(?P<url>[^"]+)"[^>]+>\s*<img src="(?P<thumb>[^"]+)" alt="(?P<title>.*?)(?: Sub| sub| SUB|")' + + if item.args == 'newest': item.action = 'findvideos' def itemHook(item): if 'sub-ita' in item.url: if item.args != 'newest': item.title = item.title + support.typo('Sub-ITA','_ [] color kod') item.contentLanguage = 'Sub-ITA' - if item.args == 'newest': - url = support.match(item, '<a href="([^"]+)" title="[^"]+" target="[^"]+" class="btn', headers=headers)[0] - item.url = url[0] if url else '' - delete = support.scrapertools.find_single_match(item.fulltitle, r'( Episodi.*)') - episode = support.scrapertools.find_single_match(item.title, r'Episodi(?:o)? (?:\d+÷)?(\d+)') - item.title = support.typo(episode + ' - ','bold') + item.title.replace(delete,'') - item.fulltitle = item.show = item.title.replace(delete,'') - item.episode = episode return item return locals() @@ -94,9 +84,15 @@ def peliculas(item): @support.scrape def episodios(item): anime = True - patron = r'<td style[^>]+>\s*.*?(?:<span[^>]+)?<strong>(?P<title>[^<]+)<\/strong>.*?<td style[^>]+>\s*<a href="(?P<url>[^"]+)"[^>]+>' + data = support.match(item, headers=headers).data + if '<h6>Streaming</h6>' in data: + patron = r'<td style[^>]+>\s*.*?(?:<span[^>]+)?<strong>(?P<title>[^<]+)<\/strong>.*?<td style[^>]+>\s*<a href="(?P<url>[^"]+)"[^>]+>' + else: + patron = r'<a\s*href="(?P<url>[^"]+)"\s*title="(?P<title>[^"]+)"\s*class="btn btn-dark mb-1">' def itemHook(item): - item.url = item.url.replace(host, '') + support.log(item) + if item.url.startswith('//'): item.url= 'https:' + item.url + elif item.url.startswith('/'): item.url= 'https:/' + item.url return item action = 'findvideos' return locals() @@ -104,38 +100,81 @@ def episodios(item): def findvideos(item): support.log(item) - + # try: + # from urlparse import urljoin + # except: + # from urllib.parse import urljoin + # support.dbg() itemlist = [] - - if item.episode: - from lib import unshortenit - url, c = unshortenit.unshorten(item.url) - url = support.match(item, r'<a href="([^"]+)"[^>]*>', patronBlock=r'Episodio %s(.*?)</tr>' % item.episode ,url=url)[0] - item.url = url[0] if url else '' - if 'vvvvid' in item.url: - item.action = 'play' - itemlist.append(item) + import requests + from lib import vvvvid_decoder + + if support.match(item.url, string=True, patron=r'(\d+/\d+)').match: + item.action = 'play' + itemlist.append(item) + else: + # VVVVID vars + vvvvid_host = 'https://www.vvvvid.it/vvvvid/ondemand/' + vvvvid_headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:62.0) Gecko/20100101 Firefox/62.0'} - if 'http' not in item.url: - if '//' in item.url[:2]: - item.url = 'http:' + item.url - elif host not in item.url: - item.url = host + item.url + # VVVVID session + current_session = requests.Session() + login_page = 'https://www.vvvvid.it/user/login' + conn_id = current_session.get(login_page, headers=vvvvid_headers).json()['data']['conn_id'] + payload = {'conn_id': conn_id} + + + # collect parameters + show_id = support.match(item.url, string=True, patron=r'(\d+)').match + ep_number = support.match(item.title, patron=r'(\d+)').match + json_file = current_session.get(vvvvid_host + show_id + '/seasons/', headers=vvvvid_headers, params=payload).json() + season_id = str(json_file['data'][0]['season_id']) + json_file = current_session.get(vvvvid_host + show_id + '/season/' + season_id +'/', headers=vvvvid_headers, params=payload).json() + + # select the correct episode + for episode in json_file['data']: + support.log('Number',int(episode['number']),int(ep_number)) + if int(episode['number']) == int(ep_number): + url = vvvvid_decoder.dec_ei(episode['embed_info'] or episode['embed_info']) + if 'youtube' in url: item.url = url + item.url = url.replace('manifest.f4m','master.m3u8').replace('http://','https://').replace('/z/','/i/') + if 'https' not in item.url: + url = support.match(item, url='https://or01.top-ix.org/videomg/_definst_/mp4:' + item.url + '/playlist.m3u')[1] + url = url.split()[-1] + itemlist.append( + support.Item(action= 'play', + url= 'https://or01.top-ix.org/videomg/_definst_/mp4:' + item.url + '/' + url, + server= 'directo')) + + elif 'adf.ly' in item.url: + from servers.decrypters import adfly + url = adfly.get_long_url(item.url) - if 'adf.ly' in item.url: - item.url = adfly.get_long_url(item.url) elif 'bit.ly' in item.url: - item.url = support.httptools.downloadpage(item.url, only_headers=True, follow_redirects=False).headers.get("location") + url = support.httptools.downloadpage(item.url, only_headers=True, follow_redirects=False).headers.get("location") + + else: + url = host + for u in item.url.split('/'): + # support.log(i) + if u and 'animeforce' not in u and 'http' not in u: + url += '/' + u + + if 'php?' in url: + url = support.httptools.downloadpage(url, only_headers=True, follow_redirects=False).headers.get("location") + url = support.match(url, patron=r'class="button"><a href=(?:")?([^" ]+)', headers=headers).match + else: + url = support.match(url, patron=[r'<source src=(?:")?([^" ]+)',r'name="_wp_http_referer" value="([^"]+)"']).match + if url.startswith('//'): url = 'https:' + url + elif url.startswith('/'): url = 'https:/' + url - matches = support.match(item, r'button"><a href="([^"]+)"')[0] - for video in matches: itemlist.append( support.Item(channel=item.channel, action="play", - title='diretto', - url=video, + title='Diretto', + url=url, server='directo')) return support.server(item, itemlist=itemlist) diff --git a/channels/animeleggendari.py b/channels/animeleggendari.py index f3fd8b00..3e814de3 100644 --- a/channels/animeleggendari.py +++ b/channels/animeleggendari.py @@ -74,7 +74,7 @@ def peliculas(item): @support.scrape def episodios(item): - data = support.match(item, headers=headers)[1] + data = support.match(item, headers=headers).data if not any(x in data for x in ['Lista Episodi', 'Movie Parte']): support.log('NOT IN DATA') patron = r'(?:iframe src|str)="(?P<url>[^"]+)"' @@ -107,7 +107,7 @@ def episodios(item): return locals() def check(item): - data = support.match(item, headers=headers)[1] + data = support.match(item, headers=headers).data if 'Lista Episodi' not in data: item.data = data return findvideos(item) @@ -120,7 +120,7 @@ def findvideos(item): if item.data: data = item.data else: - matches = support.match(item, '(?:str="([^"]+)"|iframe src="([^"]+)")')[0] + matches = support.match(item, patron=r'(?:str="([^"]+)"|iframe src="([^"]+)")').matches data = '' if matches: for match in matches: diff --git a/channels/animesaturn.py b/channels/animesaturn.py index fd51ec44..e07d0de4 100644 --- a/channels/animesaturn.py +++ b/channels/animesaturn.py @@ -10,8 +10,7 @@ __channel__ = "animesaturn" host = support.config.get_setting("channel_host", __channel__) headers={'X-Requested-With': 'XMLHttpRequest'} -IDIOMAS = {'Italiano': 'ITA'} -list_language = IDIOMAS.values() + list_servers = ['openload', 'fembed', 'animeworld'] list_quality = ['default', '480p', '720p', '1080p'] @@ -67,7 +66,7 @@ def peliculas(item): deflang= 'Sub-ITA' if item.args == 'updated': post = "page=" + str(item.page if item.page else 1) if item.page > 1 else None - page, data = support.match(item, r'data-page="(\d+)" title="Next">', post=post, headers=headers) + page= support.match(item, patron=r'data-page="(\d+)" title="Next">', post=post, headers=headers).match patron = r'<img alt="[^"]+" src="(?P<thumb>[^"]+)" [^>]+></div></a>\s*<a href="(?P<url>[^"]+)"><div class="testo">(?P<title>[^\(<]+)(?:(?P<lang>\(([^\)]+)\)))?</div></a>\s*<a href="[^"]+"><div class="testo2">[^\d]+(?P<episode>\d+)</div></a>' if page: nextpage = page item.contentType='episode' @@ -85,8 +84,8 @@ def peliculas(item): def check(item): - movie, data = support.match(item, r'Episodi:</b> (\d*) Movie') - anime_id = support.match(data, r'anime_id=(\d+)')[0][0] + movie = support.match(item, patron=r'Episodi:</b> (\d*) Movie') + anime_id = support.match(movie.data, patron=r'anime_id=(\d+)').match item.url = host + "/loading_anime?anime_id=" + anime_id if movie: item.contentType = 'movie' @@ -108,9 +107,9 @@ def episodios(item): def findvideos(item): support.log() itemlist = [] - urls = support.match(item, r'<a href="([^"]+)"><div class="downloadestreaming">', headers=headers)[0] + urls = support.match(item, patron=r'<a href="([^"]+)"><div class="downloadestreaming">', headers=headers, debug=True).matches if urls: - links = support.match(item, r'(?:<source type="[^"]+"\s*src=|file:\s*)"([^"]+)"', url=urls[0], headers=headers)[0] + links = support.match(urls[0], patron=r'(?:<source type="[^"]+"\s*src=|file:\s*)"([^"]+)"', headers=headers).matches for link in links: itemlist.append( support.Item(channel=item.channel, diff --git a/channels/animesubita.py b/channels/animesubita.py index 488f06d8..0df2dc84 100644 --- a/channels/animesubita.py +++ b/channels/animesubita.py @@ -34,7 +34,7 @@ def newest(categoria): if itemlist[-1].action == "ultimiep": itemlist.pop() - # Continua l'esecuzione in caso di errore + # Continua l'esecuzione in caso di errore except: import sys for line in sys.exc_info(): @@ -50,7 +50,7 @@ def search(item, texto): item.args = 'alt' try: return peliculas(item) - # Continua la ricerca in caso di errore + # Continua la ricerca in caso di errore except: import sys for line in sys.exc_info(): @@ -67,10 +67,9 @@ def genres(item): @support.scrape -def peliculas(item): +def peliculas(item): anime = True if item.args == 'updated': - #patron = r'<div class="post-thumbnail">\s*<a href="(?P<url>[^"]+)" title="(?P<title>.*?)\s*(?P<episode>Episodio \d+)[^"]+"[^>]*>\s*<img[^src]+src="(?P<thumb>[^"]+)"' patron = r'<div class="post-thumbnail">\s*<a href="(?P<url>[^"]+)" title="(?P<title>.*?)\s*Episodio (?P<episode>\d+) (?P<lang>[a-zA-Z-\s]+)[^"]*"> <img[^src]+src="(?P<thumb>[^"]+)"' patronNext = r'<link rel="next" href="([^"]+)"\s*/>' action = 'findvideos' @@ -98,32 +97,27 @@ def findvideos(item): itemlist = [] if item.args == 'updated': - ep = support.match(item.fulltitle,r'(Episodio\s*\d+)')[0][0] - item.url = support.re.sub(r'episodio-\d+-|oav-\d+-', '',item.url) + ep = support.match(item.fulltitle, patron=r'(\d+)').match + item.url = support.re.sub(r'episodio-\d+-|oav-\d+-'+ep, '',item.url) if 'streaming' not in item.url: item.url = item.url.replace('sub-ita','sub-ita-streaming') - item.url = support.match(item, r'<a href="([^"]+)"[^>]+>', ep + '(.*?)</tr>', )[0][0] + item.url = support.match(item, patron= ep + r'[^>]+>[^>]+>[^>]+><a href="([^"]+)"').match - urls = support.match(item.url, r'(episodio\d*.php.*)')[0] - for url in urls: - url = host + '/' + url - headers['Referer'] = url - data = support.match(item, headers=headers, url=url)[1] - cookies = "" - matches = support.re.compile('(.%s.*?)\n' % host.replace("http://", "").replace("www.", ""), support.re.DOTALL).findall(support.config.get_cookie_data()) - for cookie in matches: - cookies += cookie.split('\t')[5] + "=" + cookie.split('\t')[6] + ";" + # post + url = host + '/' + support.match(item.url, patron=r'(episodio\d*.php.*?)"').match.replace('%3F','?').replace('%3D','=') + headers['Referer'] = url + cookies = "" + matches = support.re.compile('(.%s.*?)\n' % host.replace("http://", "").replace("www.", ""), support.re.DOTALL).findall(support.config.get_cookie_data()) + for cookie in matches: + cookies += cookie.split('\t')[5] + "=" + cookie.split('\t')[6] + ";" + headers['Cookie'] = cookies[:-1] - headers['Cookie'] = cookies[:-1] - - url = support.match(data, r'<source src="([^"]+)"[^>]+>')[0][0] + '|' + support.urllib.urlencode(headers) - itemlist.append( - support.Item(channel=item.channel, - action="play", - title='diretto', - quality='', - url=url, - server='directo', - fulltitle=item.fulltitle, - show=item.show)) + url = support.match(url, patron=r'<source src="([^"]+)"[^>]+>').match - return support.server(item,url,itemlist) + itemlist.append( + support.Item(channel=item.channel, + action="play", + title='Diretto', + url=url + '|' + support.urllib.urlencode(headers), + server='directo')) + + return support.server(item,itemlist=itemlist) \ No newline at end of file diff --git a/channels/animetubeita.py b/channels/animetubeita.py index 12c5c283..63db64e6 100644 --- a/channels/animetubeita.py +++ b/channels/animetubeita.py @@ -98,11 +98,11 @@ def episodios(item): def findvideos(item): itemlist=[] if item.args == 'last': - match = support.match(item, r'href="(?P<url>[^"]+)"[^>]+><strong>DOWNLOAD & STREAMING</strong>', url=item.url)[0] + match = support.match(item, patron=r'href="(?P<url>[^"]+)"[^>]+><strong>DOWNLOAD & STREAMING</strong>').match if match: patronBlock = r'<h6>Episodio</h6>(?P<block>.*?)(?:<!--|</table>)' patron = r'<a href="http://link\.animetubeita\.com/2361078/(?P<url>[^"]+)"' - match = support.match(item, patron, patronBlock, headers, match[0])[0] + match = support.match(match, patron=patron, patronBlock=patronBlock, headers=headers).match else: return itemlist if match: item.url = match[-1] diff --git a/channels/animeworld.py b/channels/animeworld.py index 58a5b07a..e9b26a4c 100644 --- a/channels/animeworld.py +++ b/channels/animeworld.py @@ -34,7 +34,7 @@ def mainlist(item): def genres(item): support.log() itemlist = [] - matches = support.match(item, r'<input.*?name="([^"]+)" value="([^"]+)"\s*>[^>]+>([^<]+)<\/label>' , r'<button class="btn btn-sm btn-default dropdown-toggle" data-toggle="dropdown"> Generi <span.[^>]+>(.*?)</ul>', headers=headers)[0] + matches = support.match(item, patron=r'<input.*?name="([^"]+)" value="([^"]+)"\s*>[^>]+>([^<]+)<\/label>' , patronBlock=r'<button class="btn btn-sm btn-default dropdown-toggle" data-toggle="dropdown"> Generi <span.[^>]+>(.*?)</ul>', headers=headers).matches for name, value, title in matches: support.menuItem(itemlist, __channel__, support.typo(title, 'bold'), 'peliculas', host + '/filter?' + name + '=' + value + '&sort=' + order(), 'tvshow', args='sub') return itemlist @@ -44,7 +44,7 @@ def build_menu(item): support.log() itemlist = [] support.menuItem(itemlist, __channel__, 'Tutti bold', 'peliculas', item.url , 'tvshow' , args=item.args) - matches = support.match(item,r'<button class="btn btn-sm btn-default dropdown-toggle" data-toggle="dropdown"> (.*?) <span.[^>]+>(.*?)</ul>',r'<form class="filters.*?>(.*?)</form>', headers=headers)[0] + matches = support.match(item, patron=r'<button class="btn btn-sm btn-default dropdown-toggle" data-toggle="dropdown"> (.*?) <span.[^>]+>(.*?)</ul>', patronBlock=r'<form class="filters.*?>(.*?)</form>', headers=headers).matches for title, html in matches: if title not in 'Lingua Ordine': support.menuItem(itemlist, __channel__, title + ' submenu bold', 'build_sub_menu', html, 'tvshow', args=item.args) @@ -127,7 +127,7 @@ def peliculas(item): def episodios(item): anime=True pagination = 50 - data = support.match(item, headers=headers)[1] + data = support.match(item, headers=headers).data if 'VVVVID' in data: patronBlock= r'<div class="server\s*active\s*"(?P<block>.*?)</ul>' else: patronBlock= r'server active(?P<block>.*?)server hidden ' patron = r'<li><a [^=]+="[^"]+"[^=]+="[^"]+"[^=]+="[^"]+"[^=]+="[^"]+"[^=]+="[^"]+" href="(?P<url>[^"]+)"[^>]+>(?P<episode>[^<]+)<' @@ -143,9 +143,11 @@ def findvideos(item): import time support.log(item) itemlist = [] - matches, data = support.match(item, r'class="tab.*?data-name="([0-9]+)">', headers=headers) + matches = support.match(item, patron=r'class="tab.*?data-name="([0-9]+)">', headers=headers) + data = matches.data + matches = matches.matches videoData = '' - + for serverid in matches: if not item.number: item.number = support.scrapertools.find_single_match(item.title, r'(\d+) -') block = support.scrapertools.find_multiple_matches(data, 'data-id="' + serverid + '">(.*?)<div class="server') @@ -153,7 +155,7 @@ def findvideos(item): support.log('ID= ',serverid) if id: if serverid == '26': - matches = support.match(item, r'<a href="([^"]+)"', url='%s/ajax/episode/serverPlayer?id=%s' % (host, item.url.split('/')[-1]))[0] + matches = support.match('%s/ajax/episode/serverPlayer?id=%s' % (host, item.url.split('/')[-1]), patron=r'<a href="([^"]+)"', ).matches for url in matches: videoData += '\n' + url else: @@ -162,7 +164,7 @@ def findvideos(item): json = jsontools.load(dataJson) support.log(json) if 'keepsetsu' in json['grabber']: - matches = support.match(item, r'<iframe\s*src="([^"]+)"', url=json['grabber'])[0] + matches = support.match(json['grabber'], patron=r'<iframe\s*src="([^"]+)"'),matches for url in matches: videoData += '\n' + url else: diff --git a/channels/casacinema.py b/channels/casacinema.py index 083d5986..c4046df0 100644 --- a/channels/casacinema.py +++ b/channels/casacinema.py @@ -39,7 +39,7 @@ def genres(item): def select(item): - item.data = support.match(item)[1] + item.data = support.match(item).data if 'continua con il video' in item.data.lower(): support.log('select = ### è un film ###') item.contentType = 'movie' @@ -140,10 +140,10 @@ def episodios(item): def findvideos(item): if item.contentType != 'movie': - links = support.match(item.url, r'href="([^"]+)"')[0] + links = support.match(item.url, patron=r'href="([^"]+)"').matches else: matchData = item.data if item.data else item - links = support.match(matchData, r'(?:SRC|href)="([^"]+)"', patronBlock=r'<div class="col-md-10">(.+?)<div class="ads">')[0] + links = support.match(matchData, patron=r'(?:SRC|href)="([^"]+)"', patronBlock=r'<div class="col-md-10">(.+?)<div class="ads">').matches data = '' from lib.unshortenit import unshorten_only for link in links: diff --git a/channels/cb01anime.py b/channels/cb01anime.py index d08c70b2..9e892298 100644 --- a/channels/cb01anime.py +++ b/channels/cb01anime.py @@ -59,7 +59,7 @@ def peliculas(item): blacklist = Blacklist item.contentType = 'tvshow' if item.args == 'newest': - data = support.match(item)[1] + # data = support.match(item).data patron = r'<div id="blockvids"><ul><li><a href="(?P<url>[^"]+)"[^>]+><img src="(?P<thumb>[^"]+)"[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>(?P<title>[^\[]+)\[(?P<lang>[^\]]+)\]' else: patron = r'<div class="span4">\s*<a href="(?P<url>[^"]+)"><img src="(?P<thumb>[^"]+)"[^>]+><\/a>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+> <h1>(?P<title>[^<\[]+)(?:\[(?P<lang>[^\]]+)\])?</h1></a>.*?-->(?:.*?<br />)?\s*(?P<plot>[^<]+)' @@ -68,7 +68,7 @@ def peliculas(item): return locals() def check(item): - item.url = support.match(item,r'(?:<p>|/>)(.*?)(?:<br|</td>|</p>)', r'Streaming:(.*?)</tr>')[0] + item.url = support.match(item, patron=r'(?:<p>|/>)(.*?)(?:<br|</td>|</p>)', patronBlock=r'Streaming:(.*?)</tr>').matches if 'Episodio' in str(item.url): item.contentType = 'tvshow' return episodios(item) @@ -87,14 +87,14 @@ def episodios(item): sp = 0 for match in item.url: if 'stagione' in match.lower(): - find_season = support.match(match, r'Stagione\s*(\d+)')[0] - season = int(find_season[0]) if find_season else season + 1 if 'prima' not in match.lower() else season + find_season = support.match(match, patron=r'Stagione\s*(\d+)').match + season = int(find_season) if find_season else season + 1 if 'prima' not in match.lower() else season else: - try: title = support.match(match,'<a[^>]+>([^<]+)</a>')[0][0] + try: title = support.match(match, patron=r'<a[^>]+>([^<]+)</a>').match except: title = '' if title: if 'episodio' in title.lower(): - ep = support.match(match, r'Episodio ((?:\d+.\d|\d+|\D+))')[0][0] + ep = support.match(match, patron=r'Episodio ((?:\d+.\d|\d+|\D+))').match check = ep.isdigit() if check or '.' in ep: if '.' in ep: diff --git a/channels/cinetecadibologna.py b/channels/cinetecadibologna.py index 892c3a19..9bf10c08 100644 --- a/channels/cinetecadibologna.py +++ b/channels/cinetecadibologna.py @@ -73,7 +73,7 @@ def findvideos(item): support.log() itemlist = [] - matches = support.match(item, 'filename: "(.*?)"')[0] + matches = support.match(item, patron=r'filename: "(.*?)"').matches for url in matches: itemlist.append( diff --git a/channels/dreamsub.py b/channels/dreamsub.py index 4415224e..a9c0889f 100644 --- a/channels/dreamsub.py +++ b/channels/dreamsub.py @@ -31,10 +31,11 @@ def menu(item): action = 'peliculas' patronBlock = r'<div class="filter-header"><b>%s</b>(?P<block>.*?)<div class="filter-box">' % item.args - patronMenu = r'<a class="[^"]+" data-state="[^"]+" (?P<url>[^>]+)>[^>]+></i>[^>]+></i>[^>]+></i>(?P<title>[^>]+)</a>' + patronMenu = r'<a class="[^"]+" data-state="[^"]+" (?P<other>[^>]+)>[^>]+></i>[^>]+></i>[^>]+></i>(?P<title>[^>]+)</a>' def itemHook(item): - for Type, ID in support.match(item.url, r'data-type="([^"]+)" data-id="([^"]+)"')[0]: + support.log(item.type) + for Type, ID in support.match(item.other, patron=r'data-type="([^"]+)" data-id="([^"]+)"').matches: item.url = host + '/search?' + Type + 'Y=' + ID return item return locals() @@ -110,28 +111,34 @@ def findvideos(item): itemlist = [] support.log() - matches, data = support.match(item, r'<a href="([^"]+)"', r'<div style="white-space: (.*?)<div id="main-content"') + matches = support.match(item, patron=r'<a href="([^"]+)"', patronBlock=r'<div style="white-space: (.*?)<div id="main-content"') - if not matches: - item.data = data + if not matches.matches: + item.data = matches.data item.contentType = 'tvshow' return episodios(item) - matches.sort() + # matches.matches.sort() - for url in matches: + for url in matches.matches: lang = url.split('/')[-2] + if 'ita' in lang.lower(): + language = 'ITA' + if 'sub' in lang.lower(): + language = 'Sub-' + language quality = url.split('/')[-1] itemlist.append( support.Item(channel=item.channel, action="play", contentType=item.contentType, - title=lang, + title=language, url=url, - contentLanguage = lang, + contentLanguage = language, quality = quality, + order = quality.replace('p','').zfill(4), server='directo', )) + itemlist.sort(key=lambda x: (x.title, x.order), reverse=False) return support.server(item, itemlist=itemlist) \ No newline at end of file diff --git a/channels/fastsubita.py b/channels/fastsubita.py index b022c94e..b96af5e8 100644 --- a/channels/fastsubita.py +++ b/channels/fastsubita.py @@ -145,7 +145,9 @@ def findvideos(item): itemlist = [] patronBlock = '<div class="entry-content">(?P<block>.*)<footer class="entry-footer">' patron = r'<a href="([^"]+)">' - matches, data = support.match(item, patron, patronBlock, headers) + html = support.match(item, patron=patron, patronBlock=patronBlock, headers=headers) + matches = html.matches + data= html.data if item.args != 'episodios': item.infoLabels['mediatype'] = 'episode' @@ -156,7 +158,7 @@ def findvideos(item): itemlist += support.server(item, data) - data = httptools.downloadpage(item.url).data + data = support.match(item.url).data patron = r'>Posted in <a href="https?://fastsubita.com/serietv/([^/]+)/(?:[^"]+)?"' series = scrapertools.find_single_match(data, patron) titles = support.typo(series.upper().replace('-', ' '), 'bold color kod') diff --git a/channels/piratestreaming.py b/channels/piratestreaming.py index c8b797a3..92aa9418 100644 --- a/channels/piratestreaming.py +++ b/channels/piratestreaming.py @@ -99,7 +99,7 @@ def findvideos(item): if item.contentType == 'episode': data = item.url else: - data = support.match(item)[1] + data = support.match(item).data if 'link-episode' in data: item.data = data return episodios(item) diff --git a/channels/seriehd.py b/channels/seriehd.py index a60c691e..95536858 100644 --- a/channels/seriehd.py +++ b/channels/seriehd.py @@ -34,11 +34,11 @@ def peliculas(item): @support.scrape def episodios(item): data ='' - url = support.match(item, patronBlock=r'<iframe width=".+?" height=".+?" src="([^"]+)" allowfullscreen frameborder="0">')[1] - seasons = support.match(item, r'<a href="([^"]+)">(\d+)<', r'<h3>STAGIONE</h3><ul>(.*?)</ul>', headers, url)[0] + url = support.match(item, patron=r'<iframe width=".+?" height=".+?" src="([^"]+)" allowfullscreen frameborder="0">').match + seasons = support.match(url, patron=r'<a href="([^"]+)">(\d+)<', patronBlock=r'<h3>STAGIONE</h3><ul>(.*?)</ul>', headers=headers).matches for season_url, season in seasons: season_url = support.urlparse.urljoin(url, season_url) - episodes = support.match(item, r'<a href="([^"]+)">(\d+)<', '<h3>EPISODIO</h3><ul>(.*?)</ul>', headers, season_url)[0] + episodes = support.match(season_url, patron=r'<a href="([^"]+)">(\d+)<', patronBlock=r'<h3>EPISODIO</h3><ul>(.*?)</ul>', headers=headers).matches for episode_url, episode in episodes: episode_url = support.urlparse.urljoin(url, episode_url) title = season + "x" + episode.zfill(2) + ' - ' + item.fulltitle diff --git a/channels/serietvsubita.py b/channels/serietvsubita.py index 1345fffa..47cb8850 100644 --- a/channels/serietvsubita.py +++ b/channels/serietvsubita.py @@ -108,7 +108,7 @@ def lista_serie(item): else: # Extrae las entradas patron = r'<li class="cat-item cat-item-\d+"><a href="([^"]+)"\s?>([^<]+)</a>' - matches = support.match(item, patron, headers=headers)[0] + matches = support.match(item, patron=patron, headers=headers).matches for i, (scrapedurl, scrapedtitle) in enumerate(matches): scrapedplot = "" scrapedthumbnail = "" @@ -148,7 +148,9 @@ def episodios(item, itemlist=[]): patron += r'<p><a href="([^"]+)">' - matches, data = support.match(item, patron, headers=headers) + html = support.match(item, patron=patron, headers=headers) + matches = html.matches + data = html.data for scrapedurl, scrapedtitle, scrapedthumbnail in matches: scrapedplot = "" @@ -224,7 +226,9 @@ def peliculas_tv(item): patron = '<div class="post-meta">\s*<a href="([^"]+)"\s*title="([^"]+)"\s*class=".*?"></a>' - matches, data = support.match(item, patron, headers=headers) + html = support.match(item, patron=patron, headers=headers) + matches = html.matches + data = html.data for scrapedurl, scrapedtitle in matches: if scrapedtitle in ["FACEBOOK", "RAPIDGATOR", "WELCOME!"]: @@ -298,7 +302,7 @@ def search(item, texto): itemlist = [] patron = '<li class="cat-item cat-item-\d+"><a href="([^"]+)"\s?>([^<]+)</a>' - matches = support.match(item, patron, headers=headers)[0] + matches = support.match(item, patron=patron, headers=headers).matches for i, (scrapedurl, scrapedtitle) in enumerate(matches): if texto.upper() in scrapedtitle.upper(): scrapedthumbnail = "" @@ -333,7 +337,7 @@ def list_az(item): alphabet = dict() patron = '<li class="cat-item cat-item-\d+"><a href="([^"]+)"\s?>([^<]+)</a>' - matches = support.match(item, patron, headers=headers)[0] + matches = support.match(item, patron=patron, headers=headers).matches for i, (scrapedurl, scrapedtitle) in enumerate(matches): letter = scrapedtitle[0].upper() if letter not in alphabet: diff --git a/channels/serietvu.py b/channels/serietvu.py index c4c9bd2f..f3878b99 100644 --- a/channels/serietvu.py +++ b/channels/serietvu.py @@ -50,7 +50,7 @@ def peliculas(item): @support.scrape def episodios(item): - seasons, data = support.match(item, r'<option value="(\d+)"[^>]*>\D+(\d+)') + seasons = support.match(item, patron=r'<option value="(\d+)"[^>]*>\D+(\d+)').matches patronBlock = r'</select><div style="clear:both"></div></h2>(?P<block>.*?)<div id="trailer" class="tab">' patron = r'(?:<div class="list (?:active)?" data-id="(?P<season>\d+)">[^>]+>)?\s*<a data-id="(?P<episode>\d+)(?:[ ](?P<lang>[SuUbBiItTaA\-]+))?"(?P<url>[^>]+)>[^>]+>[^>]+>(?P<title>.+?)(?:\sSub-ITA)?<' def itemHook(item): diff --git a/channels/streamtime.py b/channels/streamtime.py index 3aad1040..e32056f3 100644 --- a/channels/streamtime.py +++ b/channels/streamtime.py @@ -149,15 +149,15 @@ def findvideos(item): id = item.args['id'] season = str(item.args['season']) episode = str(item.args['episode']) - res = support.match(item, 'src="([^"]+)"[^>]*></video>', url=url, headers=[['Referer', domain]]) + res = support.match(url, patron='src="([^"]+)"[^>]*></video>', headers=[['Referer', domain]]).match itemlist = [] - if res[0]: + if res: itemlist.append( Item(channel=item.channel, action="play", title='contentful', - url=res[0][0], + url=res, server='directo', fulltitle=item.fulltitle, thumbnail=item.thumbnail, diff --git a/channels/tantifilm.py b/channels/tantifilm.py index 9b17b2d8..6582b3d9 100644 --- a/channels/tantifilm.py +++ b/channels/tantifilm.py @@ -124,7 +124,7 @@ def anime(item): log() itemlist = [] - seasons = support.match(item, r'<div class="sp-body[^"]+">(.*?)<\/div>')[0] + seasons = support.match(item, patron=r'<div class="sp-body[^"]+">(.*?)<\/div>').matches for season in seasons: episodes = scrapertools.find_multiple_matches(season, r'<a.*?href="([^"]+)"[^>]+>([^<]+)<\/a>(.*?)<(:?br|\/p)') for url, title, urls, none in episodes: @@ -208,7 +208,7 @@ def newest(categoria): item = Item() item.url = host +'/aggiornamenti/' - matches = support.match(item, r'mediaWrapAlt recomended_videos"[^>]+>\s*<a href="([^"]+)" title="([^"]+)" rel="bookmark">\s*<img[^s]+src="([^"]+)"[^>]+>')[0] + matches = support.match(item, patron=r'mediaWrapAlt recomended_videos"[^>]+>\s*<a href="([^"]+)" title="([^"]+)" rel="bookmark">\s*<img[^s]+src="([^"]+)"[^>]+>').matches for url, title, thumb in matches: title = scrapertools.decodeHtmlentities(title).replace("Permalink to ", "").replace("streaming", "") @@ -236,11 +236,11 @@ def findvideos(item): ## data = item.url ## else: ## data = httptools.downloadpage(item.url, headers=headers).data - data = httptools.downloadpage(item.url, headers=headers).data + data = support.match(item.url, headers=headers).data data = re.sub('\n|\t', ' ', data) data = re.sub(r'>\s+<', '> <', data) - check = scrapertools.find_single_match(data, r'<div class="category-film">\s+<h3>\s+(.*?)\s+</h3>\s+</div>') + check = support.match(data, patron=r'<div class="category-film">\s+<h3>\s+(.*?)\s+</h3>\s+</div>').match if 'sub' in check.lower(): item.contentLanguage = 'Sub-ITA' support.log("CHECK : ", check) diff --git a/channels/toonitalia.py b/channels/toonitalia.py index 7736f26d..5f6dbef2 100644 --- a/channels/toonitalia.py +++ b/channels/toonitalia.py @@ -104,7 +104,7 @@ def peliculas(item): @support.scrape def episodios(item): anime = True - data = support.match(item, headers=headers)[1] + data = support.match(item, headers=headers).data if 'https://vcrypt.net' in data: patron = r'(?:<br /> |<p>)(?P<title>[^<]+)<a href="(?P<url>[^"]+)"' else: diff --git a/channels/vvvvid.py b/channels/vvvvid.py index 7c4975ef..02e8dce1 100644 --- a/channels/vvvvid.py +++ b/channels/vvvvid.py @@ -131,7 +131,7 @@ def peliculas(item): elif '=' in item.args: json_file = current_session.get(item.url + 'channels', headers=headers, params=payload).json() - Filter = support.match(item.args,r'\?([^=]+)=')[0][0] + Filter = support.match(item.args, patron=r'\?([^=]+)=').match keys = [i[Filter] for i in json_file['data'] if Filter in i][0] for key in keys: if key not in ['1','2']: @@ -162,7 +162,7 @@ def episodios(item): for episode in episodes: for key in episode: if 'stagione' in key['title'].encode('utf8').lower(): - match = support.match(key['title'].encode('utf8'), r'[Ss]tagione\s*(\d+) - [Ee]pisodio\s*(\d+)')[0][0] + match = support.match(key['title'].encode('utf8'), patron=r'[Ss]tagione\s*(\d+) - [Ee]pisodio\s*(\d+)').match title = match[0]+'x'+match[1] + ' - ' + item.fulltitle make_item = True elif int(key['season_id']) == int(season_id): @@ -206,7 +206,7 @@ def findvideos(item): if 'youtube' in url: item.url = url item.url = url.replace('manifest.f4m','master.m3u8').replace('http://','https://').replace('/z/','/i/') if 'https' not in item.url: - url = support.match(item, url='https://or01.top-ix.org/videomg/_definst_/mp4:' + item.url + '/playlist.m3u')[1] + url = support.match('https://or01.top-ix.org/videomg/_definst_/mp4:' + item.url + '/playlist.m3u') url = url.split()[-1] itemlist.append( Item(action= 'play', diff --git a/core/support.py b/core/support.py index bb2870e5..6e683675 100755 --- a/core/support.py +++ b/core/support.py @@ -180,7 +180,7 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t if debug: regexDbg(item, patron, headers, block) - known_keys = ['url', 'title', 'title2', 'season', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere', 'rating', 'type', 'lang'] + known_keys = ['url', 'title', 'title2', 'season', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere', 'rating', 'type', 'lang', 'other'] # Legenda known_keys per i groups nei patron # known_keys = ['url', 'title', 'title2', 'season', 'episode', 'thumb', 'quality', # 'year', 'plot', 'duration', 'genere', 'rating', 'type', 'lang'] @@ -301,7 +301,8 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t contentTitle= scraped['title'] if item.contentType or CT == 'movie' else '', contentLanguage = lang1, contentEpisodeNumber=episode if episode else '', - news= item.news if item.news else '' + news= item.news if item.news else '', + other = scraped['other'] if scraped['other'] else '' ) for lg in list(set(listGroups).difference(known_keys)): @@ -445,7 +446,7 @@ def scrape(func): if anime: if function == 'episodios' or item.action == 'episodios': autorenumber.renumber(itemlist, item, 'bold') else: autorenumber.renumber(itemlist) - if anime and autorenumber.check(item) == False and not scrapertools.find_single_match(itemlist[0].title, r'(\d+.\d+)'): + if anime and autorenumber.check(item) == False and len(itemlist)>0 and not scrapertools.find_single_match(itemlist[0].title, r'(\d+.\d+)'): pass else: if addVideolibrary and (item.infoLabels["title"] or item.fulltitle): @@ -772,31 +773,91 @@ def typo(string, typography=''): return string -def match(item, patron='', patronBlock='', headers='', url='', post=''): +def match(item_url_string, **args): + ''' + match is a function that combines httptools and scraper tools: + ''' + log(item_url_string) + matches = [] - if type(item) == str: - data = item + url = None + # arguments allowed for scrape + patron = args.get('patron', None) + patronBlock = args.get('patronBlock', None) + patronBlocks = args.get('patronBlock', None) + debug = args.get('debug', False) + debugBlock = args.get('debugBlock', False) + string = args.get('string', False) + # remove scrape arguments + args = dict([(key, val) for key, val in args.items() if key not in ['patron', 'patronBlock', 'patronBlocks', 'debug', 'debugBlock', 'string']]) + # dbg() + # check type of item_url_string + if type(item_url_string) == str: + if item_url_string.startswith('http') and not string: url = item_url_string + else : data = item_url_string else: - url = url if url else item.url - if post: - data = httptools.downloadpage(url, headers=headers, ignore_response_code=True, post=post).data.replace("'", '"') - else: - data = httptools.downloadpage(url, headers=headers, ignore_response_code=True).data.replace("'", '"') + # if item_url_string is an item use item.url as url + url = item_url_string.url + + # if there is a url, download the page + if url: + if args.get('ignore_response_code', None) is None: + args['ignore_response_code'] = True + data = httptools.downloadpage(url, **args).data.replace("'", '"') + + # format page data data = re.sub(r'\n|\t', ' ', data) data = re.sub(r'>\s\s*<', '><', data) - log('DATA= ', data) + # collect blocks of a page if patronBlock: - block = scrapertools.find_single_match(data, patronBlock) - log('BLOCK= ',block) + blocks = [scrapertools.find_single_match(data, patronBlock)] + elif patronBlocks: + blocks = scrapertools.find_multiple_matches(data, patronBlock) else: - block = data + blocks = [data] + # match if patron: - matches = scrapertools.find_multiple_matches(block, patron) - log('MATCHES= ',matches) + if type(patron) == str: patron = [patron] + for b in blocks: + for p in patron: + matches += scrapertools.find_multiple_matches(b, p) - return matches, block + # debug mode + if config.dev_mode(): + if debugBlock: + match_dbg(data, patronBlock) + if debug: + for block in blocks: + for p in patron: + match_dbg(block, p) + + # create a item + item = Item(data=data, + blocks=blocks, + block=blocks[0] if len(blocks) > 0 else '', + matches=matches, + match=matches[0] if len(matches) > 0 else '') + + return item + + +def match_dbg(data, patron): + import json, urllib2, webbrowser + url = 'https://regex101.com' + headers = {'content-type': 'application/json'} + data = { + 'regex': patron, + 'flags': 'gm', + 'testString': data, + 'delimiter': '"""', + 'flavor': 'python' + } + r = urllib2.Request(url + '/api/regex', json.dumps(data, encoding='latin1'), headers=headers) + r = urllib2.urlopen(r).read() + permaLink = json.loads(r)['permalinkFragment'] + webbrowser.open(url + "/r/" + permaLink) def download(itemlist, item, typography='', function_level=1, function=''): diff --git a/specials/community.py b/specials/community.py index 9dacf7f6..4eaf0532 100644 --- a/specials/community.py +++ b/specials/community.py @@ -547,11 +547,9 @@ def episodios(item): if pagination and i >= pag * pagination: break # pagination match = [] if episode.has_key('number'): - match = support.match(episode['number'], r'(?P<season>\d+)x(?P<episode>\d+)')[0] - if match: - match = match[0] + match = support.match(episode['number'], patron=r'(?P<season>\d+)x(?P<episode>\d+)').match if not match and episode.has_key('title'): - match = support.match(episode['title'], r'(?P<season>\d+)x(?P<episode>\d+)')[0] + match = support.match(episode['title'], patron=r'(?P<season>\d+)x(?P<episode>\d+)').match if match: match = match[0] if match: episode_number = match[1] @@ -561,7 +559,7 @@ def episodios(item): season_number = episode['season'] if episode.has_key('season') else season if season else 1 episode_number = episode['number'] if episode.has_key('number') else '' if not episode_number.isdigit(): - episode_number = support.match(episode['title'], r'(?P<episode>\d+)')[0][0] + episode_number = support.match(episode['title'], patron=r'(?P<episode>\d+)').match ep = int(episode_number) if episode_number else ep if not episode_number: episode_number = str(ep).zfill(2)