migliorie a support

This commit is contained in:
mac12m99
2019-08-07 16:47:07 +02:00
parent b14673e4eb
commit 28474b0249
10 changed files with 230 additions and 282 deletions
+5 -5
View File
@@ -45,9 +45,9 @@ def peliculas(item):
action="findvideos" action="findvideos"
if item.args == "search": if item.args == "search":
patronBlock = r'</script> <div class="boxgrid caption">(.*?)<div id="right_bar">' patronBlock = r'</script> <div class="boxgrid caption">(?P<block>.*)<div id="right_bar">'
else: else:
patronBlock = r'<div class="cover_kapsul ml-mask">(.*?)<div class="page_nav">' patronBlock = r'<div class="cover_kapsul ml-mask">(?P<block>.*)<div class="page_nav">'
patron = r'<div class="cover boxcaption"> <h2>.<a href="(?P<url>[^"]+)">.*?<.*?src="(?P<thumb>[^"]+)"'\ patron = r'<div class="cover boxcaption"> <h2>.<a href="(?P<url>[^"]+)">.*?<.*?src="(?P<thumb>[^"]+)"'\
'.+?[^>]+>[^>]+<div class="trdublaj"> (?P<quality>[A-Z]+)<[^>]+>(?:.[^>]+>(?P<lang>.*?)<[^>]+>).*?'\ '.+?[^>]+>[^>]+<div class="trdublaj"> (?P<quality>[A-Z]+)<[^>]+>(?:.[^>]+>(?P<lang>.*?)<[^>]+>).*?'\
'<p class="h4">(?P<title>.*?)</p>[^>]+> [^>]+> [^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+> [^>]+> '\ '<p class="h4">(?P<title>.*?)</p>[^>]+> [^>]+> [^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+> [^>]+> '\
@@ -69,13 +69,13 @@ def categorie(item):
blacklist = 'altadefinizione01' blacklist = 'altadefinizione01'
if item.args == 'genres': if item.args == 'genres':
patronBlock = r'<ul class="kategori_list">(.*?)</ul>' patronBlock = r'<ul class="kategori_list">(?P<block>.*)</ul>'
patron = '<li><a href="(?P<url>[^"]+)">(?P<title>.*?)</a>' patron = '<li><a href="(?P<url>[^"]+)">(?P<title>.*?)</a>'
elif item.args == 'years': elif item.args == 'years':
patronBlock = r'<ul class="anno_list">(.*?)</ul>' patronBlock = r'<ul class="anno_list">(?P<block>.*)</ul>'
patron = '<li><a href="(?P<url>[^"]+)">(?P<title>.*?)</a>' patron = '<li><a href="(?P<url>[^"]+)">(?P<title>.*?)</a>'
elif item.args == 'orderalf': elif item.args == 'orderalf':
patronBlock = r'<div class="movies-letter">(.*)<div class="clearfix">' patronBlock = r'<div class="movies-letter">(?P<block>.*)<div class="clearfix">'
patron = '<a title=.*?href="(?P<url>[^"]+)"><span>(?P<title>.*?)</span>' patron = '<a title=.*?href="(?P<url>[^"]+)"><span>(?P<title>.*?)</span>'
return locals() return locals()
+4 -4
View File
@@ -41,9 +41,9 @@ def peliculas(item):
action="findvideos" action="findvideos"
if item.args == "search": if item.args == "search":
patronBlock = r'</script> <div class="boxgrid caption">(.*?)<div id="right_bar">' patronBlock = r'</script> <div class="boxgrid caption">(?P<block>.*)<div id="right_bar">'
else: else:
patronBlock = r'<div class="cover_kapsul ml-mask">(.*?)<div class="page_nav">' patronBlock = r'<div class="cover_kapsul ml-mask">(?P<block>.*)<div class="page_nav">'
patron = r'<div class="cover boxcaption"> <h2>.<a href="(?P<url>[^"]+)">.*?<.*?src="(?P<thumb>[^"]+)"'\ patron = r'<div class="cover boxcaption"> <h2>.<a href="(?P<url>[^"]+)">.*?<.*?src="(?P<thumb>[^"]+)"'\
'.+?[^>]+>[^>]+<div class="trdublaj"> (?P<quality>[A-Z]+)<[^>]+>(?:.[^>]+>(?P<lang>.*?)<[^>]+>).*?'\ '.+?[^>]+>[^>]+<div class="trdublaj"> (?P<quality>[A-Z]+)<[^>]+>(?:.[^>]+>(?P<lang>.*?)<[^>]+>).*?'\
'<p class="h4">(?P<title>.*?)</p>[^>]+> [^>]+> [^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+> [^>]+> '\ '<p class="h4">(?P<title>.*?)</p>[^>]+> [^>]+> [^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+> [^>]+> '\
@@ -65,10 +65,10 @@ def categorie(item):
blacklist = 'Altadefinizione01' blacklist = 'Altadefinizione01'
if item.args == 'genres': if item.args == 'genres':
patronBlock = r'<ul class="kategori_list">(.*?)</ul>' patronBlock = r'<ul class="kategori_list">(?P<block>.*)</ul>'
patron = '<li><a href="(?P<url>[^"]+)">(?P<title>.*?)</a>' patron = '<li><a href="(?P<url>[^"]+)">(?P<title>.*?)</a>'
elif item.args == 'years': elif item.args == 'years':
patronBlock = r'<ul class="anno_list">(.*?)</ul>' patronBlock = r'<ul class="anno_list">(?P<block>.*)</ul>'
patron = '<li><a href="(?P<url>[^"]+)">(?P<title>.*?)</a>' patron = '<li><a href="(?P<url>[^"]+)">(?P<title>.*?)</a>'
elif item.args == 'orderalf': elif item.args == 'orderalf':
patronBlock = r'<div class="movies-letter">(.*)<div class="clearfix">' patronBlock = r'<div class="movies-letter">(.*)<div class="clearfix">'
+4 -4
View File
@@ -60,13 +60,13 @@ def genres(item):
action = 'peliculas' action = 'peliculas'
if item.args == 'genres': if item.args == 'genres':
patronBlock = r'<ul class="listSubCat" id="Film">(.*?)</ul>' patronBlock = r'<ul class="listSubCat" id="Film">(?P<block>.*)</ul>'
elif item.args == 'years': elif item.args == 'years':
patronBlock = r'<ul class="listSubCat" id="Anno">(.*?)</ul>' patronBlock = r'<ul class="listSubCat" id="Anno">(?P<block>.*)</ul>'
elif item.args == 'quality': elif item.args == 'quality':
patronBlock = r'<ul class="listSubCat" id="Qualita">(.*?)</ul>' patronBlock = r'<ul class="listSubCat" id="Qualita">(?P<block>.*)</ul>'
elif item.args == 'lucky': # sono i titoli random nella pagina, cambiano 1 volta al dì elif item.args == 'lucky': # sono i titoli random nella pagina, cambiano 1 volta al dì
patronBlock = r'FILM RANDOM.*?class="listSubCat">(.*?)</ul>' patronBlock = r'FILM RANDOM.*?class="listSubCat">(?P<block>.*)</ul>'
action = 'findvideos' action = 'findvideos'
patron = r'<li><a href="(?P<url>[^"]+)">(?P<title>[^<]+)<' patron = r'<li><a href="(?P<url>[^"]+)">(?P<title>[^<]+)<'
+1 -1
View File
@@ -37,7 +37,7 @@ def menu(item):
action='peliculas' action='peliculas'
patron = r'<li><a href="(?P<url>[^"]+)">(?P<title>[^<]+)</a></li>' patron = r'<li><a href="(?P<url>[^"]+)">(?P<title>[^<]+)</a></li>'
patronBlock= r'<ul class="listSubCat" id="'+ str(item.args) + '">(.*?)</ul>' patronBlock= r'<ul class="listSubCat" id="'+ str(item.args) + '">(?P<block>.*)</ul>'
return locals() return locals()
+26 -96
View File
@@ -44,10 +44,9 @@ def mainlist(item):
('Anni', ['', 'menu', 'Film per Anno']) ('Anni', ['', 'menu', 'Film per Anno'])
] ]
tvshow = ['/serietv/', tvshow = ['/serietv/',
('Aggiornamenti serie tv', ['/serietv/aggiornamento-quotidiano-serie-tv/', 'last']),
('Per Lettera', ['/serietv/', 'menu', 'Serie-Tv per Lettera']), ('Per Lettera', ['/serietv/', 'menu', 'Serie-Tv per Lettera']),
('Per Genere', ['/serietv/aggiornamento-quotidiano-serie-tv/', 'menu', 'Serie-Tv per Genere']), ('Per Genere', ['/serietv/', 'menu', 'Serie-Tv per Genere']),
('Per anno', ['/serietv/aggiornamento-quotidiano-serie-tv/', 'menu', 'Serie-Tv per Anno']) ('Per anno', ['/serietv/', 'menu', 'Serie-Tv per Anno'])
] ]
return locals() return locals()
@@ -56,76 +55,39 @@ def mainlist(item):
@support.scrape @support.scrape
def menu(item): def menu(item):
findhost() findhost()
patronBlock = item.args + r'<span.*?><\/span>.*?<ul.*?>(.*?)<\/ul>' patronBlock = item.args + r'<span.*?><\/span>.*?<ul.*?>(?P<block>.*?)<\/ul>'
patronMenu = r'href="?(?P<url>[^">]+)"?>(?P<title>.*?)<\/a>' patronMenu = r'href="?(?P<url>[^">]+)"?>(?P<title>.*?)<\/a>'
action = 'peliculas' action = 'peliculas'
return locals() return locals()
@support.scrape
def newest(categoria): def newest(categoria):
findhost() findhost()
itemlist = [] debug = True
item = Item() item = Item()
item.contentType = 'movie' item.contentType = 'movie'
item.url = host + '/lista-film-ultimi-100-film-aggiunti/' item.url = host + '/lista-film-ultimi-100-film-aggiunti/'
return support.scrape(item, r'<a href=([^>]+)>([^<([]+)(?:\[([A-Z]+)\])?\s\(([0-9]{4})\)<\/a>', patron = "<a href=(?P<url>[^>]+)>(?P<title>[^<([]+)(?:\[(?P<quality>[A-Z]+)\])?\s\((?P<year>[0-9]{4})\)<\/a>"
['url', 'title', 'quality', 'year'], patronBlock = r'Ultimi 100 film aggiunti:.*?<\/td>'
patronBlock=r'Ultimi 100 film aggiunti:.*?<\/td>')
return locals()
def last(item): def search(item, text):
support.log() support.log(item.url, "search", text)
itemlist = []
infoLabels = {}
quality = ''
PERPAGE = 20
page = 1
if item.page:
page = item.page
if item.contentType == 'tvshow': try:
matches = support.match(item, r'<a href="([^">]+)".*?>([^(:(|[)]+)([^<]+)<\/a>', '<article class="sequex-post-content.*?</article>', headers)[0] item.url = item.url + "/?s=" + text.replace(' ', '+')
else: return peliculas(item)
matches = support.match(item, r'<a href=([^>]+)>([^(:(|[)]+)([^<]+)<\/a>', r'<strong>Ultimi 100 film Aggiornati:<\/a><\/strong>(.*?)<td>', headers)[0]
for i, (url, title, info) in enumerate(matches): # Continua la ricerca in caso di errore
if (page - 1) * PERPAGE > i: continue except:
if i >= page * PERPAGE: break import sys
add = True for line in sys.exc_info():
title = title.rstrip() logger.error("%s" % line)
if item.contentType == 'tvshow': return []
for i in itemlist:
if i.url == url: # togliamo i doppi
add = False
else:
infoLabels['year'] = scrapertoolsV2.find_single_match(info, r'\(([0-9]+)\)')
quality = scrapertoolsV2.find_single_match(info, r'\[([A-Z]+)\]')
if quality:
longtitle = title + support.typo(quality,'_ [] color kod')
else:
longtitle = title
if add:
itemlist.append(
Item(channel=item.channel,
action='findvideos' if item.contentType == 'movie' else 'episodios',
contentType=item.contentType,
title=longtitle,
fulltitle=title,
show=title,
quality=quality,
url=url,
infoLabels=infoLabels
)
)
support.pagination(itemlist, item, page, PERPAGE)
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
return itemlist
@support.scrape @support.scrape
@@ -137,51 +99,19 @@ def peliculas(item):
patron = r'div class="card-image">.*?<img src="(?P<thumb>[^ ]+)" alt.*?<a href="(?P<url>[^ >]+)">(?P<title>[^<[(]+)<\/a>.*?<strong><span style="[^"]+">(?P<genre>[^<>0-9(]+)\((?P<year>[0-9]{4}).*?</(?:p|div)>(?P<plot>.*?)</div' patron = r'div class="card-image">.*?<img src="(?P<thumb>[^ ]+)" alt.*?<a href="(?P<url>[^ >]+)">(?P<title>[^<[(]+)<\/a>.*?<strong><span style="[^"]+">(?P<genre>[^<>0-9(]+)\((?P<year>[0-9]{4}).*?</(?:p|div)>(?P<plot>.*?)</div'
action = 'episodios' action = 'episodios'
patronBlock=[r'<div class="?sequex-page-left"?>(.*?)<aside class="?sequex-page-right"?>', # patronBlock=[r'<div class="?sequex-page-left"?>(?P<block>.*?)<aside class="?sequex-page-right"?>',
'<div class="?card-image"?>.*?(?=<div class="?card-image"?>|<div class="?rating"?>)'] # '<div class="?card-image"?>.*?(?=<div class="?card-image"?>|<div class="?rating"?>)']
patronNext='<a class="?page-link"? href="?([^>]+)"?><i class="fa fa-angle-right">' patronNext='<a class="?page-link"? href="?([^>]+)"?><i class="fa fa-angle-right">'
return locals() return locals()
@support.scrape
def episodios(item): def episodios(item):
itemlist = [] patronBlock = r'(?P<block><div class="sp-head[a-z ]*?" title="Espandi">\s*STAGIONE [0-9]+ - (?P<lang>[^\s]+)(?: - (?P<quality>[^-<]+))?.*?[^<>]*?</div>.*?)<div class="spdiv">\[riduci\]</div>'
patron = '(?:<p>)(?P<episode>[0-9]+(?:&#215;|×)[0-9]+)(?P<url>.*?)(?:</p>|<br)'
data = httptools.downloadpage(item.url).data return locals()
matches = scrapertoolsV2.find_multiple_matches(data,
r'(<div class="sp-head[a-z ]*?" title="Espandi">[^<>]*?</div>.*?)<div class="spdiv">\[riduci\]</div>')
for match in matches:
support.log(match)
blocks = scrapertoolsV2.find_multiple_matches(match, '(?:<p>)(.*?)(?:</p>|<br)')
season = scrapertoolsV2.find_single_match(match, r'title="Espandi">.*?STAGIONE\s+\d+([^<>]+)').strip()
for block in blocks:
episode = scrapertoolsV2.find_single_match(block, r'([0-9]+(?:&#215;|×)[0-9]+)').strip()
seasons_n = scrapertoolsV2.find_single_match(block, r'<strong>STAGIONE\s+\d+([^<>]+)').strip()
if seasons_n:
season = seasons_n
if not episode: continue
season = re.sub(r'&#8211;|', "-", season)
itemlist.append(
Item(channel=item.channel,
action="findvideos",
contentType='episode',
title="[B]" + episode + "[/B] " + season,
fulltitle=episode + " " + season,
show=episode + " " + season,
url=block,
extra=item.extra,
thumbnail=item.thumbnail,
infoLabels=item.infoLabels
))
support.videolibrary(itemlist, item)
return itemlist
def findvideos(item): def findvideos(item):
+1 -1
View File
@@ -43,7 +43,7 @@ def mainlist(item):
return locals() return locals()
def genres(item): def genres(item):
return support.scrape2(item, patronBlock=r'<div id="bordobar" class="dropdown-menu(.*?)</li>', patron=r'<a class="dropdown-item" href="([^"]+)" title="([A-z]+)"', listGroups=['url', 'title'], action='video') return support.scrape2(item, patronBlock=r'<div id="bordobar" class="dropdown-menu(?P<block>.*)</li>', patron=r'<a class="dropdown-item" href="([^"]+)" title="([A-z]+)"', listGroups=['url', 'title'], action='video')
def peliculas(item): def peliculas(item):
+1 -1
View File
@@ -208,7 +208,7 @@ def findvideos(item):
itemlist = [] itemlist = []
# data = httptools.downloadpage(item.url, headers=headers).data # data = httptools.downloadpage(item.url, headers=headers).data
patronBlock = '<div class="entry-content">(.*?)<footer class="entry-footer">' patronBlock = '<div class="entry-content">(?P<block>.*)<footer class="entry-footer">'
# bloque = scrapertools.find_single_match(data, patronBlock) # bloque = scrapertools.find_single_match(data, patronBlock)
patron = r'<a href="([^"]+)">' patron = r'<a href="([^"]+)">'
+1 -1
View File
@@ -75,7 +75,7 @@ def newest(categoria):
def genre(item): def genre(item):
patronMenu = '<a href="(?P<url>[^"]+)">(?P<title>[^<]+)</a>' patronMenu = '<a href="(?P<url>[^"]+)">(?P<title>[^<]+)</a>'
blacklist = ['Serie TV','Serie TV Americane','Serie TV Italiane','altadefinizione'] blacklist = ['Serie TV','Serie TV Americane','Serie TV Italiane','altadefinizione']
patronBlock = '<ul class="sub-menu">(.*?)</ul>' patronBlock = '<ul class="sub-menu">(?P<block>.*)</ul>'
return locals() return locals()
+4 -4
View File
@@ -41,17 +41,17 @@ def peliculas(item):
if item.args == 'search': if item.args == 'search':
patron = r'<h2 class="entry-title"><a href="(?P<url>[^"]+)" rel="bookmark">(?P<title>[^<]+)</a>' patron = r'<h2 class="entry-title"><a href="(?P<url>[^"]+)" rel="bookmark">(?P<title>[^<]+)</a>'
elif item.args == 'last': elif item.args == 'last':
patronBlock = 'Aggiornamenti</h2>(.*?)</ul>' patronBlock = 'Aggiornamenti</h2>(?P<block>.*)</ul>'
patron = '<a href="(?P<url>[^"]+)">(?P<title>[^<]+)</a>' patron = '<a href="(?P<url>[^"]+)">(?P<title>[^<]+)</a>'
elif item.args == 'most_view': elif item.args == 'most_view':
patronBlock = 'I piu visti</h2>(.*?)</ul>' patronBlock = 'I piu visti</h2>(?P<block>.*)</ul>'
patron = '<a href="(?P<url>[^"]+)" title="(?P<title>[^"]+)"' patron = '<a href="(?P<url>[^"]+)" title="(?P<title>[^"]+)"'
elif item.args == 'new': elif item.args == 'new':
patronBlock = '<main[^>]+>(.*?)</main>' patronBlock = '<main[^>]+>(?P<block>.*)</main>'
patron = '<a href="(?P<url>[^"]+)" rel="bookmark">(?P<title>[^<]+)</a>[^>]+>[^>]+>[^>]+><img.*?src="(?P<thumb>[^"]+)".*?<p>(?P<plot>[^<]+)</p>' patron = '<a href="(?P<url>[^"]+)" rel="bookmark">(?P<title>[^<]+)</a>[^>]+>[^>]+>[^>]+><img.*?src="(?P<thumb>[^"]+)".*?<p>(?P<plot>[^<]+)</p>'
patronNext = '<a class="next page-numbers" href="([^"]+)">' patronNext = '<a class="next page-numbers" href="([^"]+)">'
else: else:
patronBlock = '"lcp_catlist"[^>]+>(.*?)</ul>' patronBlock = '"lcp_catlist"[^>]+>(?P<block>.*)</ul>'
patron = r'<li ><a href="(?P<url>[^"]+)" title="[^>]+">(?P<title>[^<|\(]+)?(?:\([^\d]*(?P<year>\d+)\))?[^<]*</a>' patron = r'<li ><a href="(?P<url>[^"]+)" title="[^>]+">(?P<title>[^<|\(]+)?(?:\([^\d]*(?P<year>\d+)\))?[^<]*</a>'
action = 'findvideos' if item.contentType == 'movie' else 'check' action = 'findvideos' if item.contentType == 'movie' else 'check'
+183 -165
View File
@@ -130,7 +130,7 @@ def regexDbg(item, patron, headers, data=''):
'regex': patron, 'regex': patron,
'flags': 'gm', 'flags': 'gm',
'testString': html, 'testString': html,
'delimiter': '"', 'delimiter': '"""',
'flavor': 'python' 'flavor': 'python'
} }
r = urllib2.Request(url + '/api/regex', json.dumps(data), headers=headers) r = urllib2.Request(url + '/api/regex', json.dumps(data), headers=headers)
@@ -141,15 +141,138 @@ def regexDbg(item, patron, headers, data=''):
def scrape2(item, patron = '', listGroups = [], headers="", blacklist="", data="", patronBlock="", def scrape2(item, patron = '', listGroups = [], headers="", blacklist="", data="", patronBlock="",
patronNext="", action="findvideos", addVideolibrary = True, typeContentDict={}, typeActionDict={}): patronNext="", action="findvideos", addVideolibrary = True, typeContentDict={}, typeActionDict={}):
m = re.search(r'\((?!\?)', patron) m = re.search(r'(?<!\\|\[)\((?!\?)', patron)
n = 0 n = 0
while m: while m:
patron = patron[:m.end()] + '?P<' + listGroups[n] + '>' + patron[m.end():] patron = patron[:m.end()] + '?P<' + listGroups[n] + '>' + patron[m.end():]
m = re.search(r'\((?!\?)', patron) m = re.search(r'(?<!\\|\[)\((?!\?)', patron)
n += 1 n += 1
regexDbg(item, patron, headers) regexDbg(item, patron, headers)
def scrapeLang(scraped, lang, longtitle):
## Aggiunto/modificato per gestire i siti che hanno i video
## in ita e subita delle serie tv nella stessa pagina
if scraped['lang']:
if 'sub' in scraped['lang'].lower():
lang = 'Sub-ITA'
else:
lang = 'ITA'
if lang != '':
longtitle += typo(lang, '_ [] color kod')
return lang, longtitle
def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, pag):
itemlist = []
matches = scrapertoolsV2.find_multiple_matches_groups(block, patron)
log('MATCHES =', matches)
if debug:
regexDbg(item, patron, headers, block)
known_keys = ['url', 'title', 'title2', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere',
'rating', 'type', 'lang'] # by greko aggiunto episode
lang = '' # aggiunto per gestire i siti con pagine di serietv dove si hanno i video in ita e in subita
for i, match in enumerate(matches):
if pagination and (pag - 1) * pagination > i: continue # pagination
if pagination and i >= pag * pagination: break # pagination
listGroups = match.keys()
match = match.values()
if len(listGroups) > len(match): # to fix a bug
match = list(match)
match.extend([''] * (len(listGroups) - len(match)))
scraped = {}
for kk in known_keys:
val = match[listGroups.index(kk)] if kk in listGroups else ''
if val and (kk == "url" or kk == 'thumb') and 'http' not in val:
val = scrapertoolsV2.find_single_match(item.url, 'https?://[a-z0-9.-]+') + val
scraped[kk] = val
if scraped['title']:
title = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped['title'])
.replace('"', "'").replace('×', 'x').replace('',
'-')).strip() # fix by greko da " a '
else:
title = ''
plot = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped["plot"]))
longtitle = typo(title, 'bold')
if scraped['quality']: longtitle = longtitle + typo(scraped['quality'], '_ [] color kod')
if scraped['episode']:
scraped['episode'] = re.sub(r'\s-\s|-|x|&#8211|&#215;', 'x', scraped['episode'])
longtitle = typo(scraped['episode'] + ' - ', 'bold') + longtitle
if scraped['title2']:
title2 = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped['title2'])
.replace('"', "'").replace('×', 'x').replace('', '-')).strip()
longtitle = longtitle + typo(title2, 'bold _ -- _')
lang, longitle = scrapeLang(scraped, lang, longtitle)
# if title is set, probably this is a list of episodes or video sources
if item.infoLabels["title"]:
infolabels = item.infoLabels
else:
infolabels = {}
if scraped['year']:
infolabels['year'] = scraped['year']
if scraped["plot"]:
infolabels['plot'] = plot
if scraped['duration']:
matches = scrapertoolsV2.find_multiple_matches(scraped['duration'],
r'([0-9])\s*?(?:[hH]|:|\.|,|\\|\/|\||\s)\s*?([0-9]+)')
for h, m in matches:
scraped['duration'] = int(h) * 60 + int(m)
if not matches:
scraped['duration'] = scrapertoolsV2.find_single_match(scraped['duration'], r'(\d+)')
infolabels['duration'] = int(scraped['duration']) * 60
if scraped['genere']:
genres = scrapertoolsV2.find_multiple_matches(scraped['genere'], '[A-Za-z]+')
infolabels['genere'] = ", ".join(genres)
if scraped["rating"]:
infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scraped["rating"])
if typeContentDict:
for name, variants in typeContentDict.items():
if scraped['type'] in variants:
item.contentType = name
if typeActionDict:
for name, variants in typeActionDict.items():
if scraped['type'] in variants:
action = name
if (scraped["title"] and scraped["title"] not in blacklist) or longtitle:
it = Item(
channel=item.channel,
action=action,
contentType='episode' if (
action == 'findvideos' and item.contentType == 'tvshow') else item.contentType,
title=longtitle,
fulltitle=item.fulltitle if (action == 'findvideos' and item.contentType != 'movie') else title,
show=item.show if (action == 'findvideos' and item.contentType != 'movie') else title,
quality=scraped["quality"],
url=scraped["url"],
infoLabels=infolabels,
thumbnail=scraped["thumb"],
args=item.args,
contentSerieName=title if (action == 'episodios' and item.contentType != 'movie') else '',
contentLanguage=lang
)
for lg in list(set(listGroups).difference(known_keys)):
it.__setattr__(lg, match[listGroups.index(lg)])
if 'itemHook' in args:
it = args['itemHook'](it)
itemlist.append(it)
return itemlist, matches
def scrape(func): def scrape(func):
# args is a dict containing the foolowing keys: # args is a dict containing the foolowing keys:
@@ -188,7 +311,7 @@ def scrape(func):
action = args['action'] if 'action' in args else 'findvideos' action = args['action'] if 'action' in args else 'findvideos'
anime = args['anime'] if 'anime' in args else '' anime = args['anime'] if 'anime' in args else ''
addVideolibrary = args['addVideolibrary'] if 'addVideolibrary' in args else True addVideolibrary = args['addVideolibrary'] if 'addVideolibrary' in args else True
blacklist = args['blacklist'] if 'blacklist' in args else '' blacklist = args['blacklist'] if 'blacklist' in args else []
data = args['data'] if 'data' in args else '' data = args['data'] if 'data' in args else ''
patron = args['patron'] if 'patron' in args else args['patronMenu'] if 'patronMenu' in args else '' patron = args['patron'] if 'patron' in args else args['patronMenu'] if 'patronMenu' in args else ''
headers = args['headers'] if 'headers' in args else func.__globals__['headers'] headers = args['headers'] if 'headers' in args else func.__globals__['headers']
@@ -200,6 +323,9 @@ def scrape(func):
if 'pagination' in args: pagination = args['pagination'] if args['pagination'] else 20 if 'pagination' in args: pagination = args['pagination'] if args['pagination'] else 20
else: pagination = '' else: pagination = ''
pag = item.page if item.page else 1 # pagination
matches = []
log('PATRON= ', patron) log('PATRON= ', patron)
if not data: if not data:
data = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True).data.replace("'", '"') data = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True).data.replace("'", '"')
@@ -207,175 +333,67 @@ def scrape(func):
# replace all ' with " and eliminate newline, so we don't need to worry about # replace all ' with " and eliminate newline, so we don't need to worry about
log('DATA =', data) log('DATA =', data)
block = data if patronBlock:
blocks = scrapertoolsV2.find_multiple_matches_groups(data, patronBlock)
block = ""
for bl in blocks:
blockItemlist, blockMatches = scrapeBlock(item, args, bl['block'], patron, headers, action, pagination, debug,
typeContentDict, typeActionDict, blacklist, pag)
for it in blockItemlist:
if 'lang' in bl:
it.contentLanguage, it.title = scrapeLang(bl, it.contentLanguage, it.title)
if 'quality' in bl and bl['quality']:
it.quality = bl['quality']
it.title = it.title + typo(bl['quality'], '_ [] color kod')
log('BLOCK ', '=', block)
itemlist.extend(blockItemlist)
matches.extend(blockMatches)
elif patron:
itemlist, matches = scrapeBlock(item, args, data, patron, headers, action, pagination, debug, typeContentDict,
typeActionDict, blacklist, pag)
if patronBlock: checkHost(item, itemlist)
if type(patronBlock) == str:
patronBlock = [patronBlock]
for n, regex in enumerate(patronBlock): if (item.contentType == "tvshow" and (action != "findvideos" and action != "play")) \
blocks = scrapertoolsV2.find_multiple_matches(block, regex) or (item.contentType == "episode" and action != "play") \
block = "" or (item.contentType == "movie" and action != "play") :
for b in blocks: tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
block += "\n" + str(b) # else: # Si perde item show :(
log('BLOCK ', n, '=', block) # for it in itemlist:
else: # it.infoLabels = item.infoLabels
block = data
if patron:
matches = scrapertoolsV2.find_multiple_matches_groups(block, patron)
log('MATCHES =', matches)
if debug: if 'itemlistHook' in args:
regexDbg(item, patron, headers, block) itemlist = args['itemlistHook'](itemlist)
known_keys = ['url', 'title', 'title2', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere', if patronNext:
'rating', 'type', 'lang'] # by greko aggiunto episode nextPage(itemlist, item, data, patronNext, 2)
lang = '' # aggiunto per gestire i siti con pagine di serietv dove si hanno i video in ita e in subita
pag = item.page if item.page else 1 # pagination
for i, match in enumerate(matches): # next page for pagination
if pagination and (pag - 1) * pagination > i: continue # pagination if pagination and len(matches) >= pag * pagination:
if pagination and i >= pag * pagination: break # pagination itemlist.append(
listGroups = match.keys() Item(channel=item.channel,
match = match.values() action = item.action,
contentType=item.contentType,
title=typo(config.get_localized_string(30992), 'color kod bold'),
url=item.url,
args=item.args,
page=pag + 1,
thumbnail=thumb()))
if len(listGroups) > len(match): # to fix a bug if anime:
match = list(match) from specials import autorenumber
match.extend([''] * (len(listGroups) - len(match))) if inspect.stack()[1][3] == 'episodios' or item.action == 'episodios': autorenumber.renumber(itemlist, item, 'bold')
else: autorenumber.renumber(itemlist)
scraped = {} if addVideolibrary and (item.infoLabels["title"] or item.fulltitle):
for kk in known_keys: item.fulltitle = item.infoLabels["title"]
val = match[listGroups.index(kk)] if kk in listGroups else '' videolibrary(itemlist, item)
if val and (kk == "url" or kk == 'thumb') and 'http' not in val:
val = scrapertoolsV2.find_single_match(item.url, 'https?://[a-z0-9.-]+') + val
scraped[kk] = val
if scraped['title']: if 'patronMenu' in args:
title = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped['title']) itemlist = thumb(itemlist, genre=True)
.replace('"',"'").replace('×', 'x').replace('','-')).strip() # fix by greko da " a '
else:
title = ''
plot = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped["plot"]))
longtitle = typo(title, 'bold') if 'fullItemlistHook' in args:
if scraped['quality']: longtitle = longtitle + typo(scraped['quality'], '_ [] color kod') itemlist = args['fullItemlistHook'](itemlist)
if scraped['episode']:
scraped['episode'] = re.sub(r'\s-\s|-|x|&#8211', 'x', scraped['episode'])
longtitle = typo(scraped['episode'] + ' - ', 'bold') + longtitle
if scraped['title2']:
title2 = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped['title2'])
.replace('"', "'").replace('×', 'x').replace('','-')).strip()
longtitle = longtitle + typo(title2, 'bold _ -- _')
## Aggiunto/modificato per gestire i siti che hanno i video
## in ita e subita delle serie tv nella stessa pagina
if scraped['lang']:
if 'sub' in scraped['lang'].lower():
lang = 'Sub-ITA'
else:
lang = 'ITA'
if lang != '':
longtitle += typo(lang, '_ [] color kod')
# if title is set, probably this is a list of episodes or video sources
if item.infoLabels["title"] or item.fulltitle:
infolabels = item.infoLabels
else:
infolabels = {}
if scraped['year']:
infolabels['year'] = scraped['year']
if scraped["plot"]:
infolabels['plot'] = plot
if scraped['duration']:
matches = scrapertoolsV2.find_multiple_matches(scraped['duration'],
r'([0-9])\s*?(?:[hH]|:|\.|,|\\|\/|\||\s)\s*?([0-9]+)')
for h, m in matches:
scraped['duration'] = int(h) * 60 + int(m)
if not matches:
scraped['duration'] = scrapertoolsV2.find_single_match(scraped['duration'], r'(\d+)')
infolabels['duration'] = int(scraped['duration']) * 60
if scraped['genere']:
genres = scrapertoolsV2.find_multiple_matches(scraped['genere'], '[A-Za-z]+')
infolabels['genere'] = ", ".join(genres)
if scraped["rating"]:
infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scraped["rating"])
if typeContentDict:
for name, variants in typeContentDict.items():
if scraped['type'] in variants:
item.contentType = name
if typeActionDict:
for name, variants in typeActionDict.items():
if scraped['type'] in variants:
action = name
if scraped["title"]:
if scraped["title"] not in blacklist:
it = Item(
channel=item.channel,
action=action,
contentType= 'episode' if (action == 'findvideos' and item.contentType == 'tvshow') else item.contentType,
title=longtitle,
fulltitle=item.fulltitle if (action == 'findvideos' and item.contentType != 'movie') else title,
show=item.show if (action == 'findvideos' and item.contentType != 'movie') else title,
quality=scraped["quality"],
url=scraped["url"],
infoLabels=infolabels,
thumbnail=scraped["thumb"],
args=item.args,
contentSerieName = title if (action == 'episodios' and item.contentType != 'movie') else ''
)
for lg in list(set(listGroups).difference(known_keys)):
it.__setattr__(lg, match[listGroups.index(lg)])
if 'itemHook' in args:
it = args['itemHook'](it)
itemlist.append(it)
checkHost(item, itemlist)
if (item.contentType == "tvshow" and (action != "findvideos" and action != "play")) \
or (item.contentType == "episode" and action != "play") \
or (item.contentType == "movie" and action != "play") :
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
# else: # Si perde item show :(
# for it in itemlist:
# it.infoLabels = item.infoLabels
if 'itemlistHook' in args:
itemlist = args['itemlistHook'](itemlist)
if patronNext:
nextPage(itemlist, item, data, patronNext, 2)
# next page for pagination
if pagination and len(matches) >= pag * pagination:
itemlist.append(
Item(channel=item.channel,
action = item.action,
contentType=item.contentType,
title=typo(config.get_localized_string(30992), 'color kod bold'),
url=item.url,
args=item.args,
page=pag + 1,
thumbnail=thumb()))
if anime:
from specials import autorenumber
if inspect.stack()[1][3] == 'episodios' or item.action == 'episodios': autorenumber.renumber(itemlist, item, 'bold')
else: autorenumber.renumber(itemlist)
if addVideolibrary and (item.infoLabels["title"] or item.fulltitle):
item.fulltitle = item.infoLabels["title"]
videolibrary(itemlist, item)
if 'patronMenu' in args:
itemlist = thumb(itemlist, genre=True)
if 'fullItemlistHook' in args:
itemlist = args['fullItemlistHook'](itemlist)
return itemlist return itemlist
@@ -604,7 +622,7 @@ def menu(func):
args=var[2] if len(var) > 2 else '', args=var[2] if len(var) > 2 else '',
contentType= var[3] if len(var) > 3 else 'movie',) contentType= var[3] if len(var) > 3 else 'movie',)
# add search menu for category # add search menu for category
if 'search' not in args: menuItem(itemlist, filename, 'Cerca ' + title + '… submenu bold', 'search', host, args=name) if 'search' not in args: menuItem(itemlist, filename, 'Cerca ' + title + '… submenu bold', 'search', host + url, args=name)
# Make EXTRA MENU (on bottom) # Make EXTRA MENU (on bottom)
for name, var in args.items(): for name, var in args.items():