From a7effd0d7d8568a1125c2dd7ccf02b89814d14b5 Mon Sep 17 00:00:00 2001 From: 4l3x87 <50104109+4l3x87@users.noreply.github.com> Date: Fri, 31 May 2019 20:56:36 +0200 Subject: [PATCH] Improvements channel Guardaserie.click, Fastsubita and support (#41) --- channels/fastsubita.py | 195 ++++++++++------------------------- channels/guardaserieclick.py | 164 +++++++++++------------------ core/support.py | 8 +- 3 files changed, 117 insertions(+), 250 deletions(-) diff --git a/channels/fastsubita.py b/channels/fastsubita.py index c8f575f7..000c3dab 100644 --- a/channels/fastsubita.py +++ b/channels/fastsubita.py @@ -4,13 +4,10 @@ # Canale per fastsubita # ------------------------------------------------------------ -import re - -import channelselector from core import scrapertools, httptools, tmdb, support from core.item import Item +from core.support import log from platformcode import config, logger -from specials import autoplay __channel__ = 'fastsubita' host = config.get_setting("channel_host", __channel__) @@ -19,9 +16,6 @@ list_language = IDIOMAS.values() list_servers = ['verystream', 'openload', 'speedvideo', 'wstream', 'flashx', 'vidoza', 'vidtome'] list_quality = ['default'] -# checklinks = config.get_setting('checklinks', 'fastsubita') -# checklinks_number = config.get_setting('checklinks_number', 'fastsubita') - headers = [ ['Host', 'fastsubita.com'], ['User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0'], @@ -39,32 +33,23 @@ PERPAGE = 15 def mainlist(item): - logger.info(item.channel+" mainlist") + log() itemlist = [] - support.menu(itemlist, 'Serie TV bold', 'lista_serie', host,'tvshow') - support.menu(itemlist, 'Novità submenu', 'pelicuals_tv', host,'tvshow') - support.menu(itemlist, 'Archivio A-Z submenu', 'list_az', host,'tvshow',args=['serie']) - support.menu(itemlist, 'Cerca', 'search', host,'tvshow') - - autoplay.init(item.channel, list_servers, list_quality) - autoplay.show_option(item.channel, itemlist) - - itemlist.append( - Item(channel='setting', - action="channel_config", - title=support.typo("Configurazione Canale color lime"), - config=item.channel, - folder=False, - thumbnail=channelselector.get_thumb('setting_0.png')) - ) + support.menu(itemlist, 'Novità bold', 'pelicuals_tv', host, 'tvshow') + support.menu(itemlist, 'Serie TV bold', 'lista_serie', host, 'tvshow') + support.menu(itemlist, 'Archivio A-Z submenu', 'list_az', host, 'tvshow', args=['serie']) + support.menu(itemlist, 'Cerca', 'search', host, 'tvshow') + support.aplay(item, itemlist, list_servers, list_quality) + support.channel_config(item, itemlist) return itemlist + # ---------------------------------------------------------------------------------------------------------------- def cleantitle(scrapedtitle): scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle.strip()) - scrapedtitle = scrapedtitle.replace('’', '\'').replace('×','x').replace('×','x') + scrapedtitle = scrapedtitle.replace('’', '\'').replace('×', 'x').replace('×', 'x').replace('"', "'") return scrapedtitle.strip() @@ -73,7 +58,7 @@ def cleantitle(scrapedtitle): def newest(categoria): - logger.info(__channel__+" newest" + categoria) + log() itemlist = [] item = Item() try: @@ -96,15 +81,11 @@ def newest(categoria): def pelicuals_tv(item): - logger.info(item.channel+" pelicuals_tv") + log() itemlist = [] - # Carica la pagina - data = httptools.downloadpage(item.url, headers=headers).data - - # Estrae i contenuti - patron = r'

(.*?)<' - matches = re.compile(patron, re.DOTALL).findall(data) + matches, data = support.match(item, r'

(.*?)<', + headers=headers) for scrapedurl, scrapedtitle in matches: scrapedplot = "" @@ -123,7 +104,7 @@ def pelicuals_tv(item): else: scrapedurl = "http:" + scrapedurl - title = scraped_1+" - "+infoLabels['season']+"x"+infoLabels['episode']+" Sub-ITA" + title = scraped_1 + " - " + infoLabels['season'] + "x" + infoLabels['episode'] + " Sub-ITA" itemlist.append( Item(channel=item.channel, @@ -144,35 +125,20 @@ def pelicuals_tv(item): tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) # Paginazione - support.nextPage(itemlist,item,data,'([^<]+)<\/a>' - # patron = r'([^<]+)<\/a>' - # matches = re.compile(patron, re.DOTALL).findall(block) - matches = re.compile(r'', re.DOTALL).findall(block) + matches = support.match(Item(), r'', + r'(.*?)', headers, + url="%s/" % host)[0] index = 0 - # for scrapedurl, scrapedtitle in matches: - # scrapedtitle = cleantitle(scrapedtitle) - # if "http:" not in scrapedurl: - # scrapedurl = "http:" + scrapedurl - # - # if ('S' in scrapedtitle.strip().upper()[0] and len(scrapedtitle.strip()) == 3) or '02' == scrapedtitle: - # # itemlist[index -1][0]+='{|}'+scrapedurl - # continue - # - # itemlist.append([scrapedurl,scrapedtitle]) - # index += 1 + for level, cat, title in matches: title = cleantitle(title) url = '%s?cat=%s' % (host, cat) @@ -183,12 +149,11 @@ def serietv(): itemlist.append([url, title]) index += 1 - - logger.debug(itemlist) return itemlist + def lista_serie(item): - logger.info(item.channel+" lista_serie") + log() itemlist = [] p = 1 @@ -196,16 +161,6 @@ def lista_serie(item): item.url, p = item.url.split('{}') p = int(p) - # logger.debug(p) - # Carica la pagina - # data = httptools.downloadpage(item.url, headers=headers).data - # - # block = scrapertools.find_single_match(data,r'
(.*?)
') - # - # # Estrae i contenuti - # # patron = r'
([^<]+)<\/a>' - # patron = r'([^<]+)<\/a>' - # matches = re.compile(patron, re.DOTALL).findall(block) if '||' in item.url: series = item.url.split('\n\n') matches = [] @@ -235,76 +190,41 @@ def lista_serie(item): contentType='episode', originalUrl=scrapedurl, folder=True)) - # ii += 1 tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) if len(series) >= p * PERPAGE: - scrapedurl = item.url + '{}' + str(p + 1) - itemlist.append( - Item(channel=item.channel, - action='lista_serie', - contentType=item.contentType, - title=support.typo(config.get_localized_string(30992), 'color kod bold'), - url=scrapedurl, - args=item.args, - extra=item.extra, - thumbnail=support.thumb())) + next_page = item.url + '{}' + str(p + 1) + support.nextPage(itemlist, item, next_page=next_page) return itemlist + def findvideos(item): - logger.info(item.channel+" findvideos") + log() itemlist = [] - data = httptools.downloadpage(item.url, headers=headers).data - bloque = scrapertools.find_single_match(data, '
(.*?)
  • ', r'(.*?)', headers)[0] - - for scrapedurl, scrapedtitle in matches: - itemlist.append( - Item(channel=item.channel, - action="lista_serie", - title=scrapedtitle, - contentType="tvshow", - url="".join([host, scrapedurl]), - thumbnail=item.thumbnail, - extra="tv", - folder=True)) - - return itemlist + log() + return support.scrape(item, r'
  • \s]+>([^<]+)
  • ', ['url', 'title'], patron_block=r'(.*?)', headers=headers, action="lista_serie") # ================================================================================================================ # ---------------------------------------------------------------------------------------------------------------- def lista_serie(item): - support.log(item.channel+" lista_serie") + log() itemlist = [] - # data = httptools.downloadpage(item.url, headers=headers).data - # - # patron = r'\s*[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>([^<]+)

    ' - # blocco = scrapertools.find_single_match(data, - # r'(.*?)') - # matches = re.compile(patron, re.DOTALL).findall(blocco) - - patron_block = r'(.*?)' - patron = r'\s*[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>([^<]+)

    ' + patron_block = r'(.*?)' + patron = r'\s[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>([^<]+)

    ' matches, data = support.match(item, patron, patron_block, headers) - for scrapedurl, scrapedimg, scrapedtitle in matches: scrapedtitle = cleantitle(scrapedtitle) - if scrapedtitle not in ['DMCA','Contatti','Lista di tutte le serie tv']: + if scrapedtitle not in ['DMCA', 'Contatti', 'Lista di tutte le serie tv']: itemlist.append( Item(channel=item.channel, action="episodios", @@ -254,7 +222,7 @@ def lista_serie(item): tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) - support.nextPage(itemlist,item,data,r"\s*([^<]+)<\/div>[^>]+>[^>]+>[^>]+>[^>]+>([^<]+)?[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>]+>([^<]+)<[^>]+>[^>]+>[^>]+>' - patron += r'[^<]+[^"]+".*?serie="([^"]+)".*?stag="([0-9]*)".*?ep="([0-9]*)"\s*' - patron += r'.*?embed="([^"]+)"\s*.*?embed2="([^"]+)?"\s*.*?embed3="([^"]+)?"?[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>\s*' - patron += r'(?:]+>|]+>)?' - # matches = re.compile(patron, re.DOTALL).findall(data) - - # logger.debug(matches) + patron = r'\s([^<]+)<\/div>[^>]+>[^>]+>[^>]+>[^>]+>([^<]+)?[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>]+>([^<]+)<[^>]+>[^>]+>[^>]+>' + patron += r'[^<]+[^"]+".*?serie="([^"]+)".*?stag="([0-9]*)".*?ep="([0-9]*)"\s' + patron += r'.*?embed="([^"]+)"\s.*?embed2="([^"]+)?"\s.*?embed3="([^"]+)?"?[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>\s?' + patron += r'(?:]+>|]+>)?' matches = support.match(item, patron, headers=headers)[0] - - for scrapedtitle, scrapedepisodetitle, scrapedplot, scrapedserie, scrapedseason, scrapedepisode, scrapedurl, scrapedurl2,scrapedurl3,scrapedthumbnail,scrapedthumbnail2 in matches: + for scrapedtitle, scrapedepisodetitle, scrapedplot, scrapedserie, scrapedseason, scrapedepisode, scrapedurl, scrapedurl2, scrapedurl3, scrapedthumbnail, scrapedthumbnail2 in matches: scrapedtitle = cleantitle(scrapedtitle) scrapedepisode = scrapedepisode.zfill(2) scrapedepisodetitle = cleantitle(scrapedepisodetitle) title = str("%sx%s %s" % (scrapedseason, scrapedepisode, scrapedepisodetitle)).strip() if 'SUB-ITA' in scrapedtitle: - title +=" Sub-ITA" + title += " Sub-ITA" infoLabels = {} infoLabels['season'] = scrapedseason infoLabels['episode'] = scrapedepisode itemlist.append( - Item(channel=item.channel, - action="findvideos", - title=title, - fulltitle=scrapedtitle, - url=scrapedurl+"\r\n"+scrapedurl2+"\r\n"+scrapedurl3, - contentType="episode", - plot=scrapedplot, - contentSerieName=scrapedserie, - contentLanguage='Sub-ITA' if 'Sub-ITA' in title else '', - infoLabels=infoLabels, - thumbnail=scrapedthumbnail2 if scrapedthumbnail2 != '' else scrapedthumbnail, - folder=True)) + Item(channel=item.channel, + action="findvideos", + title=title, + fulltitle=scrapedtitle, + url=scrapedurl + "\r\n" + scrapedurl2 + "\r\n" + scrapedurl3, + contentType="episode", + plot=scrapedplot, + contentSerieName=scrapedserie, + contentLanguage='Sub-ITA' if 'Sub-ITA' in title else '', + infoLabels=infoLabels, + thumbnail=scrapedthumbnail2 if scrapedthumbnail2 != '' else scrapedthumbnail, + folder=True)) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) @@ -315,22 +277,18 @@ def episodios(item): # ---------------------------------------------------------------------------------------------------------------- def findepvideos(item): - support.log(item.channel+" findepvideos") + log() data = httptools.downloadpage(item.url, headers=headers).data matches = scrapertools.find_multiple_matches(data, item.extra) data = "\r\n".join(matches[0]) item.contentType = 'movie' - itemlist = support.server(item, data=data) - - return itemlist + return support.server(item, data=data) # ================================================================================================================ # ---------------------------------------------------------------------------------------------------------------- def findvideos(item): - support.log(item.channel+" findvideos") + log() logger.debug(item.url) - itemlist = support.server(item, data=item.url) - - return itemlist + return support.server(item, data=item.url) diff --git a/core/support.py b/core/support.py index 94c93a52..c92b0c8b 100644 --- a/core/support.py +++ b/core/support.py @@ -452,7 +452,7 @@ def match(item, patron='', patron_block='', headers='', url=''): matches = [] url = url if url else item.url data = httptools.downloadpage(url, headers=headers, ignore_response_code=True).data.replace("'", '"') - data = re.sub(r'\n|\t|\s\s', '', data) + data = re.sub(r'\n|\t|\s\s', ' ', data) log('DATA= ', data) if patron_block: @@ -500,11 +500,11 @@ def videolibrary(itemlist, item, typography='', function_level=1): return itemlist -def nextPage(itemlist, item, data, patron, function_level=1): +def nextPage(itemlist, item, data='', patron='', function_level=1, next_page=''): # Function_level is useful if the function is called by another function. # If the call is direct, leave it blank - - next_page = scrapertoolsV2.find_single_match(data, patron) + if next_page == '': + next_page = scrapertoolsV2.find_single_match(data, patron) if next_page != "": if 'http' not in next_page: