From 8459bd6dc7eee4e7ef7850fd9dda6bbe1af4aa44 Mon Sep 17 00:00:00 2001 From: marco <10120390+mac12m99@users.noreply.github.com> Date: Mon, 16 Nov 2020 16:19:03 +0100 Subject: [PATCH] fix fastsubita, migliorie support e piccole modifiche --- channels.json | 2 +- channels/altadefinizione01.py | 2 +- channels/animeleggendari.py | 2 +- channels/casacinema.py | 4 +- channels/cb01anime.py | 2 +- channels/dreamsub.py | 2 +- channels/fastsubita.py | 129 ++++++++++++-------- core/support.py | 214 ++++++++++++++++++---------------- servers/streamtape.py | 3 - service.py | 2 + 10 files changed, 203 insertions(+), 159 deletions(-) diff --git a/channels.json b/channels.json index 5fd0a393..fd24875f 100644 --- a/channels.json +++ b/channels.json @@ -30,7 +30,7 @@ "cinetecadibologna": "http://cinestore.cinetecadibologna.it", "dreamsub": "https://dreamsub.stream", "dsda": "https://www.dsda.press", - "fastsubita": "https://fastsubita.uno", + "fastsubita": "https://fastsubita.xyz", "filmgratis": "https://www.filmaltadefinizione.co", "filmigratis": "https://filmigratis.org", "filmsenzalimiticc": "https://www.filmsenzalimiti01.online", diff --git a/channels/altadefinizione01.py b/channels/altadefinizione01.py index 2d5c351c..171f0e28 100644 --- a/channels/altadefinizione01.py +++ b/channels/altadefinizione01.py @@ -51,7 +51,7 @@ def peliculas(item): ## deflang = 'ITA' action="findvideos" - patron = r'

\s*(?P[^<]+).*?src="(?P<thumb>[^"]+).*?<div class="trdublaj">(?P<quality>[^<]+).*?<span class="ml-label">(?P<year>[0-9]+).*?<span class="ml-label">(?P<duration>[^<]+).*?<p>(?P<plot>[^<]+)' + patron = r'<div class="cover boxcaption"> +<h2>\s*<a href="(?P<url>[^"]+)">(?P<title>[^<]+).*?src="(?P<thumb>[^"]+).*?<div class="trdublaj">(?P<quality>[^<]+).*?<span class="ml-label">(?P<year>[0-9]+).*?<span class="ml-label">(?P<duration>[^<]+).*?<p>(?P<plot>[^<]+)' if item.args == "search": patronBlock = r'</script> <div class="boxgrid caption">(?P<block>.*)<div id="right_bar">' diff --git a/channels/animeleggendari.py b/channels/animeleggendari.py index 4715ae78..6b430e59 100644 --- a/channels/animeleggendari.py +++ b/channels/animeleggendari.py @@ -59,7 +59,7 @@ def peliculas(item): anime = True blacklist = ['top 10 anime da vedere'] if item.url != host: patronBlock = r'<div id="main-content(?P<block>.*?)<aside' - patron = r'<figure class="(?:mh-carousel-thumb|mh-posts-grid-thumb)"> <a (?:class="[^"]+" )?href="(?P<url>[^"]+)" title="(?P<title>.*?)(?: \((?P<year>\d+)\))? (?:(?P<lang>SUB ITA|ITA))(?: (?P<title2>[Mm][Oo][Vv][Ii][Ee]))?[^"]*"><img (?:class="[^"]+"|width="[^"]+" height="[^"]+") src="(?P<thumb>[^"]+)"[^>]+' + patron = r'<figure class="(?:mh-carousel-thumb|mh-posts-grid-thumb)"> +<a (?:class="[^"]+" )?href="(?P<url>[^"]+)" title="(?P<title>.*?)(?: \((?P<year>\d+)\))? (?:(?P<lang>SUB ITA|ITA))(?: (?P<title2>[Mm][Oo][Vv][Ii][Ee]))?[^"]*"><img (?:class="[^"]+"|width="[^"]+" height="[^"]+") src="(?P<thumb>[^"]+)"[^>]+' def itemHook(item): if 'movie' in item.title.lower(): item.title = support.re.sub(' - [Mm][Oo][Vv][Ii][Ee]|[Mm][Oo][Vv][Ii][Ee]','',item.title) diff --git a/channels/casacinema.py b/channels/casacinema.py index bf27fed8..df59d713 100644 --- a/channels/casacinema.py +++ b/channels/casacinema.py @@ -105,9 +105,9 @@ def peliculas(item): action = 'select' if item.args == 'newest': - patron = r'<li><a href="(?P<url>[^"]+)"[^=]+="(?P<thumb>[^"]+)"><div> <div[^>]+>(?P<title>[^\(\[<]+)(?:\[(?P<quality1>HD)\])?[ ]?(?:\(|\[)?(?P<lang>Sub-ITA)?(?:\)|\])?[ ]?(?:\[(?P<quality>.+?)\])?[ ]?(?:\((?P<year>\d+)\))?<(?:[^>]+>.+?(?:title="Nuovi episodi">(?P<episode>\d+x\d+)[ ]?(?P<lang2>Sub-Ita)?|title="IMDb">(?P<rating>[^<]+)))?' + patron = r'<li><a href="(?P<url>[^"]+)"[^=]+="(?P<thumb>[^"]+)"><div>\s+<div[^>]+>(?P<title>[^\(\[<]+)(?:\[(?P<quality1>HD)\])?[ ]?(?:\(|\[)?(?P<lang>Sub-ITA)?(?:\)|\])?[ ]?(?:\[(?P<quality>.+?)\])?[ ]?(?:\((?P<year>\d+)\))?<(?:[^>]+>.+?(?:title="Nuovi episodi">(?P<episode>\d+x\d+)[ ]?(?P<lang2>Sub-Ita)?|title="IMDb">(?P<rating>[^<]+)))?' else: - patron = r'<li><a href="(?P<url>[^"]+)"[^=]+="(?P<thumb>[^"]+)"><div> <div[^>]+>(?P<title>[^\(\[<]+)(?:\[(?P<quality1>HD)\])?[ ]?(?:\(|\[)?(?P<lang>Sub-ITA)?(?:\)|\])?[ ]?(?:\[(?P<quality>.+?)\])?[ ]?(?:\((?P<year>\d+)\))?' + patron = r'<li><a href="(?P<url>[^"]+)"[^=]+="(?P<thumb>[^"]+)"><div>\s+<div[^>]+>(?P<title>[^\(\[<]+)(?:\[(?P<quality1>HD)\])?[ ]?(?:\(|\[)?(?P<lang>Sub-ITA)?(?:\)|\])?[ ]?(?:\[(?P<quality>.+?)\])?[ ]?(?:\((?P<year>\d+)\))?' patronNext = r'<a href="([^"]+)" >Pagina' # debug = True diff --git a/channels/cb01anime.py b/channels/cb01anime.py index deccdc5d..10e0dc1a 100644 --- a/channels/cb01anime.py +++ b/channels/cb01anime.py @@ -64,7 +64,7 @@ def peliculas(item): if item.args == 'newest': patron = r'<div id="blockvids">\s*<ul>\s*<li>\s*<a href="(?P<url>[^"]+)"[^>]+><img[^>]+src="(?P<thumb>[^"]+)"[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>(?P<title>[^\[]+)\[(?P<lang>[^\]]+)\]' else: - patron = r'<div class="span4">\s*<a href="(?P<url>[^"]+)"><img src="(?P<thumb>[^"]+)"[^>]+><\/a>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+> <h1>(?P<title>[^<\[]+)(?:\[(?P<lang>[^\]]+)\])?</h1></a>.*?-->(?:.*?<br />)?\s*(?P<plot>[^<]+)' + patron = r'<div class="span4">\s*<a href="(?P<url>[^"]+)"><img src="(?P<thumb>[^"]+)"[^>]+><\/a>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+> +<h1>(?P<title>[^<\[]+)(?:\[(?P<lang>[^\]]+)\])?</h1></a>.*?-->(?:.*?<br />)?\s*(?P<plot>[^<]+)' patronNext = r'<link rel="next" href="([^"]+)"' action = 'check' return locals() diff --git a/channels/dreamsub.py b/channels/dreamsub.py index ef6cf422..a2ab1fd4 100644 --- a/channels/dreamsub.py +++ b/channels/dreamsub.py @@ -89,7 +89,7 @@ def peliculas(item): patronBlock = r'<div id="%s"[^>]+>(?P<block>.*?)<div class="vistaDettagliata"' % item.args[1] patron = r'<li>\s*<a href="(?P<url>[^"]+)" title="(?P<title>[^"]+)" class="thumb">[^>]+>[^>]+>[^>]+>\s*[EePp]+\s*(?P<episode>\d+)[^>]+>\s<img src="(?P<thumb>[^"]+)"' else: - patron = r'<div class="showStreaming"> <b>(?P<title>[^<]+)[^>]+>[^>]+>\s*<span>Lingua:\s*(?P<lang>[^>]+)?>[<>br\s]+a href="(?P<url>[^"]+)"[^>]+>.*?--image-url:url\(/*(?P<thumb>[^\)]+).*?Anno di inizio</b>:\s*(?P<year>[0-9]{4})' + patron = r'<div class="showStreaming"> +<b>(?P<title>[^<]+)[^>]+>[^>]+>\s*<span>Lingua:\s*(?P<lang>[^>]+)?>[<>br\s]+a href="(?P<url>[^"]+)"[^>]+>.*?--image-url:url\(/*(?P<thumb>[^\)]+).*?Anno di inizio</b>:\s*(?P<year>[0-9]{4})' patronNext = '<li class="currentPage">[^>]+><li[^<]+<a href="([^"]+)">' return locals() diff --git a/channels/fastsubita.py b/channels/fastsubita.py index 088c3a43..c3cfe75f 100644 --- a/channels/fastsubita.py +++ b/channels/fastsubita.py @@ -25,14 +25,12 @@ host = config.get_channel_url() headers = [['Referer', host]] - - @support.menu def mainlist(item): Tvshow = [ - ('Aggiornamenti', ['', 'peliculas', '', 'update']), - ('Cerca... {bold}{TV}', ['','search']) + ('Aggiornamenti', ['', 'peliculas', 'update']), + ('Cerca... {bold}{TV}', ['', 'search']) ] # search = '' @@ -46,6 +44,16 @@ def peliculas(item): # support.dbg() deflang = 'Sub-ITA' + # è una singola pagina con tutti gli episodi + if item.grouped and not support.scrapertools.find_single_match(item.url, '-[0-9]+x[0-9]+-'): + item.grouped = False + return episodios_args(item) + + # ogni puntata è un articolo a se + if item.fulltitle: + item.url = host + '?s=' + item.fulltitle + actLike = 'episodios' + action = 'findvideos' blacklist = [''] if item.args == 'genres': @@ -53,32 +61,45 @@ def peliculas(item): patron = r'[^>]+>[^>]+>.+?href="(?P<url>[^"]+)[^>]>(?P<title>[^<]+)\s<' action = 'episodios' elif item.args == 'search': - patronBlock = r'</h1> </header>(?P<block>.*?)</main>' - patronMenu = r'(?:<img src="(?P<thumb>[^"]+)"[^>]+>)?[^>]+>[^>]+>[^>]+>[^>]+>[^>]+><a href="(?P<url>[^"]+)"[^>]+>(?:(?P<title>.+?)[ ](?P<episode>[\d&#;\d]+\d+|\d+..\d+)(?: \([a-zA-Z\s]+\) )(?:s\d+e\d+)?[ ]?(?:[&#\d;|.{3}]+)(?P<title2>[^&#\d;|^.{3}]+)(?:|.+?))<' + group = True + patronBlock = r'</header>(?P<block>.*?)</main>' + patron = '(?:<img[^>]+src="(?P<thumb>[^"]+)".*?)?<a href="(?P<url>[^"]+)"[^>]+>(?P<title>[^<]+?)(?:(?P<episode>\d+×\d+|\d+×\d+)|\[[sS](?P<season>[0-9]+)[^]]+\])\s?(?:(?P<lang>\([a-zA-Z\s]+\)) (?:[Ss]\d+[Ee]\d+)?\s?(?:[&#\d;|.{3}]+)(?P<title2>[^”[<]+)(?:&#\d)?)?' else: - patron = r'<div class="featured-thumb"> <a href="(?P<url>[^"]+)" title="(?:(?P<title>.+?)[ ]?(?P<episode>\d+×\d+).+?“(?P<title2>.+?)”).+?">' + # è una singola pagina con tutti gli episodi + if item.args != 'update' and not support.scrapertools.find_single_match(item.url, '-[0-9]+x[0-9]+-'): + return episodios_args(item) + patron = r'<div class="featured-thumb"> +<a href="(?P<url>[^"]+)" title="(?P<title>[^[]+)\[(?P<episode>\d+×\d+)?' patronBlock = r'<main id="main"[^>]+>(?P<block>.*?)<div id="secondary' + # def itemlistHook(itemlist): + # from core import scraper + # return scraper.sort_episode_list(itemlist) + patronNext = '<a class="next page-numbers" href="(.*?)">Successivi' - #debug = True + # debug = True + return locals() + + +def episodios_args(item): + actLike = 'episodios' + # support.dbg() + + deflang = 'Sub-ITA' + action = 'findvideos' + patron = '(?P<episode>\d+×\d+|\d+[×.]+\d+)(?:\s?\((?P<lang>[a-zA-Z ]+)\))?(?:\s[Ss]\d+[Ee]+\d+)? +(?:“|“)(?P<title2>.*?)(?:”|”).*?(?P<other>.*?)(?:/>|<p)' + patronBlock = r'<main id="main" class="site-main" role="main">(?P<block>.*?)</main>' + patronNext = '<a class="next page-numbers" href="(.*?)">Successivi' + + # debug = True return locals() @support.scrape def episodios(item): support.info(item) - #support.dbg() + return episodios_args(item) - deflang = 'Sub-ITA' - action = 'findvideos' - blacklist = [''] - patron = r'<div class="featured-thumb"> <a href="(?P<url>[^"]+)" title="(?:(?P<title>.+?)[ ]?(?P<episode>\d+×\d+|\d+[×.]+\d+).+?“(?P<title2>.+?)”).+?">' - patronBlock = r'<main id="main" class="site-main" role="main">(?P<block>.*?)</main>' - patronNext = '<a class="next page-numbers" href="(.*?)">Successivi' - - #debug = True - return locals() @support.scrape def genres(item): @@ -140,38 +161,54 @@ def newest(categoria): def findvideos(item): support.info('findvideos ->', item) - itemlist = [] - patronBlock = '<div class="entry-content">(?P<block>.*)<footer class="entry-footer">' patron = r'<a href="([^"]+)">' - html = support.match(item, patron=patron, patronBlock=patronBlock, headers=headers) - matches = html.matches - data= html.data - if item.args != 'episodios': - item.infoLabels['mediatype'] = 'episode' - for scrapedurl in matches: - if 'is.gd' in scrapedurl: - resp = httptools.downloadpage(scrapedurl, follow_redirects=False) - data += resp.headers.get("location", "") + '\n' + itemlist = [] + if item.other.startswith('http'): + resp = httptools.downloadpage(item.url, follow_redirects=False) + data = resp.headers.get("location", "") + '\n' + elif item.other: + html = support.match(item.other, patron=patron, headers=headers) + matches = html.matches + data = html.data + for scrapedurl in matches: + if 'is.gd' in scrapedurl: + resp = httptools.downloadpage(scrapedurl, follow_redirects=False) + data += resp.headers.get("location", "") + '\n' + elif not support.scrapertools.find_single_match(item.url, '-[0-9]+x[0-9]+-'): + return episodios(item) + else: + patronBlock = '<div class="entry-content">(?P<block>.*)<footer class="entry-footer">' + html = support.match(item, patron=patron, patronBlock=patronBlock, headers=headers) + matches = html.matches + data= html.data + + if item.args != 'episodios': + item.infoLabels['mediatype'] = 'episode' + for scrapedurl in matches: + if 'is.gd' in scrapedurl: + resp = httptools.downloadpage(scrapedurl, follow_redirects=False) + data += resp.headers.get("location", "") + '\n' itemlist += support.server(item, data) - data = support.match(item.url).data - patron = r'>Posted in <a href="https?://fastsubita.com/serietv/([^/]+)/(?:[^"]+)?"' - series = scrapertools.find_single_match(data, patron) - titles = support.typo(series.upper().replace('-', ' '), 'bold color kod') - goseries = support.typo("Vai alla Serie:", ' bold color kod') - itemlist.append( - item.clone(channel=item.channel, - title=goseries + titles, - fulltitle=titles, - show=series, - contentType='tvshow', - contentSerieName=series, - url=host+"/serietv/"+series, - action='episodios', - contentTitle=titles, - plot = "Vai alla Serie " + titles + " con tutte le puntate", - )) + # data = support.match(item.url).data + # patron = r'>Posted in <a href="https?://fastsubita.com/serietv/([^/]+)/(?:[^"]+)?"' + # series = scrapertools.find_single_match(data, patron) + # titles = support.typo(series.upper().replace('-', ' '), 'bold color kod') + # goseries = support.typo("Vai alla Serie:", ' bold color kod') + # itemlist.append( + # item.clone(channel=item.channel, + # # title=goseries + titles, + # title=titles, + # fulltitle=titles, + # show=series, + # contentType='tvshow', + # contentSerieName=series, + # url=host+"/serietv/"+series, + # action='episodios', + # contentTitle=titles, + # plot = "Vai alla Serie " + titles + " con tutte le puntate", + # )) return itemlist diff --git a/core/support.py b/core/support.py index d90f00d9..59b58c52 100755 --- a/core/support.py +++ b/core/support.py @@ -143,12 +143,16 @@ def scrapeLang(scraped, lang, longtitle): if language: longtitle += typo(language, '_ [] color kod') return language, longtitle + def cleantitle(title): - if type(title) != str: title.decode('UTF-8') - title = scrapertools.decodeHtmlentities(title) - cleantitle = title.replace('"', "'").replace('×', 'x').replace('–', '-').strip() + cleantitle = '' + if title: + if type(title) != str: title.decode('UTF-8') + title = scrapertools.decodeHtmlentities(title) + cleantitle = title.replace('"', "'").replace('×', 'x').replace('–', '-').strip() return cleantitle + def unifyEp(ep): # ep = re.sub(r'\s-\s|-|–|×|×', 'x', scraped['episode']) ep = ep.replace('-', 'x') @@ -157,7 +161,8 @@ def unifyEp(ep): ep = ep.replace('×', 'x') return ep -def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, search, pag, function, lang, sceneTitle): + +def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, search, pag, function, lang, sceneTitle, group): itemlist = [] if debug: regexDbg(item, patron, headers, block) @@ -184,6 +189,8 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t # AVVERTENZE: Se il titolo è trovato nella ricerca TMDB/TVDB/Altro allora le locandine e altre info non saranno quelle recuperate nel sito.!!!! stagione = '' # per quei siti che hanno la stagione nel blocco ma non nelle puntate + contents = [] + for i, match in enumerate(matches): if pagination and (pag - 1) * pagination > i and not search: continue # pagination if pagination and i >= pag * pagination and not search: break # pagination @@ -207,45 +214,52 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t val = domain + val scraped[kk] = val.strip() if type(val) == str else val - if scraped['season']: - stagione = scraped['season'] - ep = unifyEp(scraped['episode']) - if 'x' in ep: - episode = ep.split('x')[0].strip() - second_episode = ep.split('x')[1].strip() + episode = '' + if not group or item.grouped: + if scraped['season'] and scraped['episode']: + stagione = scraped['season'] + ep = unifyEp(scraped['episode']) + if 'x' in ep: + episode = ep.split('x')[0].strip() + second_episode = ep.split('x')[1].strip() + else: + episode = ep + second_episode = '' + item.infoLabels['season'] = int(scraped['season']) + item.infoLabels['episode'] = int(episode) + episode = str(int(scraped['season'])) +'x'+ str(int(episode)).zfill(2) + ('x' + str(int(second_episode)).zfill(2) if second_episode else '') + elif item.season: + item.infoLabels['season'] = int(item.season) + item.infoLabels['episode'] = int(scrapertools.find_single_match(scraped['episode'], r'(\d+)')) + episode = item.season +'x'+ scraped['episode'] + elif item.contentType == 'tvshow' and (scraped['episode'] == '' and scraped['season'] == '' and stagione == ''): + item.news = 'season_completed' + episode = '' else: - episode = ep - second_episode = '' - item.infoLabels['season'] = int(scraped['season']) - item.infoLabels['episode'] = int(episode) - episode = str(int(scraped['season'])) +'x'+ str(int(episode)).zfill(2) + ('x' + str(int(second_episode)).zfill(2) if second_episode else '') - elif item.season: - item.infoLabels['season'] = int(item.season) - item.infoLabels['episode'] = int(scrapertools.find_single_match(scraped['episode'], r'(\d+)')) - episode = item.season +'x'+ scraped['episode'] - elif item.contentType == 'tvshow' and (scraped['episode'] == '' and scraped['season'] == '' and stagione == ''): - item.news = 'season_completed' - episode = '' - else: - episode = unifyEp(scraped['episode']) if scraped['episode'] else '' - try: - if 'x' in episode: - ep = episode.split('x') - episode = str(int(ep[0])).zfill(1) + 'x' + str(int(ep[1])).zfill(2) - item.infoLabels['season'] = int(ep[0]) - item.infoLabels['episode'] = int(ep[1]) - second_episode = scrapertools.find_single_match(episode, r'x\d+x(\d+)') - if second_episode: episode = re.sub(r'(\d+x\d+)x\d+',r'\1-', episode) + second_episode.zfill(2) - except: - logger.debug('invalid episode: ' + episode) - pass + episode = unifyEp(scraped['episode']) if scraped['episode'] else '' + try: + if 'x' in episode: + ep = episode.split('x') + episode = str(int(ep[0])).zfill(1) + 'x' + str(int(ep[1])).zfill(2) + item.infoLabels['season'] = int(ep[0]) + item.infoLabels['episode'] = int(ep[1]) + second_episode = scrapertools.find_single_match(episode, r'x\d+x(\d+)') + if second_episode: episode = re.sub(r'(\d+x\d+)x\d+',r'\1-', episode) + second_episode.zfill(2) + except: + logger.debug('invalid episode: ' + episode) + pass #episode = re.sub(r'\s-\s|-|x|–|×', 'x', scraped['episode']) if scraped['episode'] else '' - title = cleantitle(scraped['title']) if scraped['title'] else '' - title2 = cleantitle(scraped['title2']) if scraped['title2'] else '' - quality = scraped['quality'].strip() if scraped['quality'] else '' - Type = scraped['type'] if scraped['type'] else '' - plot = cleantitle(scraped["plot"]) if scraped["plot"] else '' + title = cleantitle(scraped.get('title', '')) + if group and scraped.get('title', '') in contents and not item.grouped: # same title and grouping enabled + continue + if item.grouped and scraped.get('title', '') != item.fulltitle: # inside a group different tvshow should not be included + continue + contents.append(title) + title2 = cleantitle(scraped.get('title2', '')) if not group or item.grouped else '' + quality = scraped.get('quality', '') + # Type = scraped['type'] if scraped['type'] else '' + plot = cleantitle(scraped.get("plot", '')) # if title is set, probably this is a list of episodes or video sources # necessaria l'aggiunta di == scraped["title"] altrimenti non prende i gruppi dopo le categorie @@ -358,8 +372,11 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t contentLanguage = lang1, contentEpisodeNumber=episode if episode else '', news= item.news if item.news else '', - other = scraped['other'] if scraped['other'] else '' + other = scraped['other'] if scraped['other'] else '', + grouped=group ) + if scraped['episode'] and group and not item.grouped: # some adjustment for grouping feature + it.action = function # for lg in list(set(listGroups).difference(known_keys)): # it.__setattr__(lg, match[listGroups.index(lg)]) @@ -373,32 +390,12 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t return itemlist, matches -def scrape(func): - # args is a dict containing the foolowing keys: - # patron: the patron to use for scraping page, all capturing group must match with listGroups - # listGroups: a list containing the scraping info obtained by your patron, in order - # accepted values are: url, title, thumb, quality, year, plot, duration, genre, rating, episode, lang +def html_uniform(data): + return re.sub("='([^']+)'", '="\\1"', data.replace('\n', ' ').replace('\t', ' ').replace(' ', ' ')) - # headers: values to pass to request header - # blacklist: titles that you want to exclude(service articles for example) - # data: if you want to pass data manually, maybe because you need some custom replacement - # patronBlock: patron to get parts of the page (to scrape with patron attribute), - # if you need a "block inside another block" you can create a list, please note that all matches - # will be packed as string - # patronNext: patron for scraping next page link - # action: if you want results perform an action different from "findvideos", useful when scraping film by genres - # addVideolibrary: if "add to videolibrary" should appear - # example usage: - # import support - # itemlist = [] - # patron = 'blablabla' - # headers = [['Referer', host]] - # blacklist = 'Request a TV serie!' - # return support.scrape(item, itemlist, patron, ['thumb', 'quality', 'url', 'title', 'year', 'plot', 'episode', 'lang'], - # headers=headers, blacklist=blacklist) - # 'type' is a check for typologies of content e.g. Film or TV Series - # 'episode' is a key to grab episode numbers if it is separated from the title - # IMPORTANT 'type' is a special key, to work need typeContentDict={} and typeActionDict={} + +def scrape(func): + """https://github.com/kodiondemand/addon/wiki/decoratori#scrape""" def wrapper(*args): itemlist = [] @@ -409,30 +406,31 @@ def scrape(func): item = args['item'] - action = args['action'] if 'action' in args else 'findvideos' - anime = args['anime'] if 'anime' in args else '' - addVideolibrary = args['addVideolibrary'] if 'addVideolibrary' in args else True - search = args['search'] if 'search' in args else '' - blacklist = args['blacklist'] if 'blacklist' in args else [] - data = args['data'] if 'data' in args else '' - patron = args['patron'] if 'patron' in args else args['patronMenu'] if 'patronMenu' in args else '' + action = args.get('action', 'findvideos') + anime = args.get('anime', '') + addVideolibrary = args.get('addVideolibrary', True) + search = args.get('search', '') + blacklist = args.get('blacklist', []) + data = args.get('data', '') + patron = args.get('patron', args.get('patronMenu', '')) if 'headers' in args: headers = args['headers'] elif 'headers' in func.__globals__: headers = func.__globals__['headers'] else: headers = '' - patronNext = args['patronNext'] if 'patronNext' in args else '' - patronBlock = args['patronBlock'] if 'patronBlock' in args else '' - typeActionDict = args['typeActionDict'] if 'typeActionDict' in args else {} - typeContentDict = args['typeContentDict'] if 'typeContentDict' in args else {} - debug = args['debug'] if 'debug' in args else False - debugBlock = args['debugBlock'] if 'debugBlock' in args else False - disabletmdb = args['disabletmdb'] if 'disabletmdb' in args else False + patronNext = args.get('patronNext', '') + patronBlock = args.get('patronBlock', '') + typeActionDict = args.get('typeActionDict', {}) + typeContentDict = args.get('typeContentDict', {}) + debug = args.get('debug', False) + debugBlock = args.get('debugBlock', False) + disabletmdb = args.get('disabletmdb', False) if 'pagination' in args and inspect.stack()[1][3] not in ['add_tvshow', 'get_episodes', 'update', 'find_episodes']: pagination = args['pagination'] if args['pagination'] else 20 else: pagination = '' - lang = args['deflang'] if 'deflang' in args else '' + lang = args.get('deflang', '') sceneTitle = args.get('sceneTitle') + group = args.get('group', False) pag = item.page if item.page else 1 # pagination matches = [] @@ -440,24 +438,19 @@ def scrape(func): logger.debug('PATRON= ', patron) if not data: page = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True) - data = re.sub("='([^']+)'", '="\\1"', page.data) - data = data.replace('\n', ' ') - data = data.replace('\t', ' ') - data = data.replace(' ', ' ') - data = re.sub(r'>\s{2,}<', '> <', data) # replace all ' with " and eliminate newline, so we don't need to worry about + data = html_uniform(page.data) scrapingTime = time() if patronBlock: if debugBlock: regexDbg(item, patronBlock, headers, data) blocks = scrapertools.find_multiple_matches_groups(data, patronBlock) - block = "" for bl in blocks: # info(len(blocks),bl) if 'season' in bl and bl['season']: item.season = bl['season'] blockItemlist, blockMatches = scrapeBlock(item, args, bl['block'], patron, headers, action, pagination, debug, - typeContentDict, typeActionDict, blacklist, search, pag, function, lang, sceneTitle) + typeContentDict, typeActionDict, blacklist, search, pag, function, lang, sceneTitle, group) for it in blockItemlist: if 'lang' in bl: it.contentLanguage, it.title = scrapeLang(bl, it.contentLanguage, it.title) @@ -468,7 +461,7 @@ def scrape(func): matches.extend(blockMatches) elif patron: itemlist, matches = scrapeBlock(item, args, data, patron, headers, action, pagination, debug, typeContentDict, - typeActionDict, blacklist, search, pag, function, lang, sceneTitle) + typeActionDict, blacklist, search, pag, function, lang, sceneTitle, group) if 'itemlistHook' in args: itemlist = args['itemlistHook'](itemlist) @@ -488,10 +481,28 @@ def scrape(func): else: break + if group and item.grouped or args.get('groupExplode'): + import copy + nextArgs = copy.copy(args) + @scrape + def newFunc(): + return nextArgs + nextArgs['item'] = nextPage(itemlist, item, data, patronNext, function) + nextArgs['group'] = False + if nextArgs['item']: + nextArgs['groupExplode'] = True + itemlist.pop() # remove next page just added + itemlist.extend(newFunc()) + else: + nextArgs['groupExplode'] = False + nextArgs['item'] = item + itemlist = newFunc() + itemlist = [i for i in itemlist if i.action not in ['add_pelicula_to_library', 'add_serie_to_library']] + if action != 'play' and function != 'episodios' and 'patronMenu' not in args and item.contentType in ['movie', 'tvshow', 'episode', 'undefined'] and not disabletmdb: tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) - if (pagination and len(matches) <= pag * pagination) or not pagination: # next page with pagination + if not group and not args.get('groupExplode') and ((pagination and len(matches) <= pag * pagination) or not pagination): # next page with pagination if patronNext and inspect.stack()[1][3] not in ['newest']: nextPage(itemlist, item, data, patronNext, function) @@ -708,11 +719,13 @@ def menuItem(itemlist, filename, title='', action='', url='', contentType='undef url = url, extra = extra, args = args, - contentType = contentType + contentType = contentType, )) def menu(func): + """https://github.com/kodiondemand/addon/wiki/decoratori#menu""" + def wrapper(*args): args = func(*args) @@ -732,7 +745,7 @@ def menu(func): itemlist = [] for name in listUrls: - dictUrl[name] = args[name] if name in args else None + dictUrl[name] = args.get(name, None) logger.debug(dictUrl[name]) if name == 'film': title = 'Film' if name == 'tvshow': title = 'Serie TV' @@ -782,7 +795,7 @@ def menu(func): if name not in listUrls and name != 'item': listUrls_extra.append(name) for name in listUrls_extra: - dictUrl[name] = args[name] if name in args else None + dictUrl[name] = args.get(name, None) for sub, var in dictUrl[name]: menuItem(itemlist, filename, title = sub + ' ', @@ -924,12 +937,7 @@ def match(item_url_string, **args): data = httptools.downloadpage(url, **args).data # format page data - data = re.sub("='([^']+)'", '="\\1"', data) - data = data.replace('\n', ' ') - data = data.replace('\t', ' ') - data = data.replace(' ', ' ') - data = re.sub(r'>\s+<', '><', data) - data = re.sub(r'([a-zA-Z])"([a-zA-Z])', "\1'\2", data) + data = html_uniform(data) # collect blocks of a page if patronBlock: @@ -1094,7 +1102,7 @@ def videolibrary(itemlist, item, typography='', function_level=1, function=''): or (function == 'episodios' and contentType != 'movie'): if config.get_videolibrary_support() and len(itemlist) > 0: itemlist.append( - Item(channel=item.channel, + item.clone(channel=item.channel, title=title, fulltitle=item.fulltitle, show=item.fulltitle, @@ -1111,6 +1119,7 @@ def videolibrary(itemlist, item, typography='', function_level=1, function=''): return itemlist + def nextPage(itemlist, item, data='', patron='', function_or_level=1, next_page='', resub=[]): # Function_level is useful if the function is called by another function. # If the call is direct, leave it blank @@ -1126,7 +1135,7 @@ def nextPage(itemlist, item, data='', patron='', function_or_level=1, next_page= next_page = next_page.replace('&', '&') info('NEXT= ', next_page) itemlist.append( - Item(channel=item.channel, + item.clone(channel=item.channel, action = action, contentType=item.contentType, title=typo(config.get_localized_string(30992), 'color kod bold'), @@ -1134,8 +1143,7 @@ def nextPage(itemlist, item, data='', patron='', function_or_level=1, next_page= args=item.args, nextPage=True, thumbnail=thumb())) - - return itemlist + return itemlist[-1] def pagination(itemlist, item, page, perpage, function_level=1): if len(itemlist) >= page * perpage: diff --git a/servers/streamtape.py b/servers/streamtape.py index d4fcf845..442884b5 100644 --- a/servers/streamtape.py +++ b/servers/streamtape.py @@ -1,9 +1,7 @@ - # -*- coding: utf-8 -*- # -------------------------------------------------------- # Conector streamtape By Alfa development Group # -------------------------------------------------------- -import re from core import httptools from core import scrapertools from platformcode import logger @@ -13,7 +11,6 @@ PY3 = False if sys.version_info[0] >= 3: PY3 = True; unicode = str; unichr = chr; long = int - def test_video_exists(page_url): logger.info("(page_url='%s')" % page_url) global data diff --git a/service.py b/service.py index ad4182fe..36806865 100644 --- a/service.py +++ b/service.py @@ -329,6 +329,8 @@ class AddonMonitor(xbmc.Monitor): if settings_post: # backup settings filetools.copy(os.path.join(config.get_data_path(), "settings.xml"), os.path.join(config.get_data_path(), "settings.bak"), True) + logger.debug({k: self.settings_pre[k] for k in self.settings_pre + if k in settings_post and self.settings_pre[k] != settings_post[k]}) from platformcode import xbmc_videolibrary if self.settings_pre.get('downloadpath', None) != settings_post.get('downloadpath', None):