migliorie a support
This commit is contained in:
@@ -45,9 +45,9 @@ def peliculas(item):
|
||||
|
||||
action="findvideos"
|
||||
if item.args == "search":
|
||||
patronBlock = r'</script> <div class="boxgrid caption">(.*?)<div id="right_bar">'
|
||||
patronBlock = r'</script> <div class="boxgrid caption">(?P<block>.*)<div id="right_bar">'
|
||||
else:
|
||||
patronBlock = r'<div class="cover_kapsul ml-mask">(.*?)<div class="page_nav">'
|
||||
patronBlock = r'<div class="cover_kapsul ml-mask">(?P<block>.*)<div class="page_nav">'
|
||||
patron = r'<div class="cover boxcaption"> <h2>.<a href="(?P<url>[^"]+)">.*?<.*?src="(?P<thumb>[^"]+)"'\
|
||||
'.+?[^>]+>[^>]+<div class="trdublaj"> (?P<quality>[A-Z]+)<[^>]+>(?:.[^>]+>(?P<lang>.*?)<[^>]+>).*?'\
|
||||
'<p class="h4">(?P<title>.*?)</p>[^>]+> [^>]+> [^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+> [^>]+> '\
|
||||
@@ -69,13 +69,13 @@ def categorie(item):
|
||||
blacklist = 'altadefinizione01'
|
||||
|
||||
if item.args == 'genres':
|
||||
patronBlock = r'<ul class="kategori_list">(.*?)</ul>'
|
||||
patronBlock = r'<ul class="kategori_list">(?P<block>.*)</ul>'
|
||||
patron = '<li><a href="(?P<url>[^"]+)">(?P<title>.*?)</a>'
|
||||
elif item.args == 'years':
|
||||
patronBlock = r'<ul class="anno_list">(.*?)</ul>'
|
||||
patronBlock = r'<ul class="anno_list">(?P<block>.*)</ul>'
|
||||
patron = '<li><a href="(?P<url>[^"]+)">(?P<title>.*?)</a>'
|
||||
elif item.args == 'orderalf':
|
||||
patronBlock = r'<div class="movies-letter">(.*)<div class="clearfix">'
|
||||
patronBlock = r'<div class="movies-letter">(?P<block>.*)<div class="clearfix">'
|
||||
patron = '<a title=.*?href="(?P<url>[^"]+)"><span>(?P<title>.*?)</span>'
|
||||
|
||||
return locals()
|
||||
|
||||
@@ -41,9 +41,9 @@ def peliculas(item):
|
||||
|
||||
action="findvideos"
|
||||
if item.args == "search":
|
||||
patronBlock = r'</script> <div class="boxgrid caption">(.*?)<div id="right_bar">'
|
||||
patronBlock = r'</script> <div class="boxgrid caption">(?P<block>.*)<div id="right_bar">'
|
||||
else:
|
||||
patronBlock = r'<div class="cover_kapsul ml-mask">(.*?)<div class="page_nav">'
|
||||
patronBlock = r'<div class="cover_kapsul ml-mask">(?P<block>.*)<div class="page_nav">'
|
||||
patron = r'<div class="cover boxcaption"> <h2>.<a href="(?P<url>[^"]+)">.*?<.*?src="(?P<thumb>[^"]+)"'\
|
||||
'.+?[^>]+>[^>]+<div class="trdublaj"> (?P<quality>[A-Z]+)<[^>]+>(?:.[^>]+>(?P<lang>.*?)<[^>]+>).*?'\
|
||||
'<p class="h4">(?P<title>.*?)</p>[^>]+> [^>]+> [^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+> [^>]+> '\
|
||||
@@ -65,10 +65,10 @@ def categorie(item):
|
||||
blacklist = 'Altadefinizione01'
|
||||
|
||||
if item.args == 'genres':
|
||||
patronBlock = r'<ul class="kategori_list">(.*?)</ul>'
|
||||
patronBlock = r'<ul class="kategori_list">(?P<block>.*)</ul>'
|
||||
patron = '<li><a href="(?P<url>[^"]+)">(?P<title>.*?)</a>'
|
||||
elif item.args == 'years':
|
||||
patronBlock = r'<ul class="anno_list">(.*?)</ul>'
|
||||
patronBlock = r'<ul class="anno_list">(?P<block>.*)</ul>'
|
||||
patron = '<li><a href="(?P<url>[^"]+)">(?P<title>.*?)</a>'
|
||||
elif item.args == 'orderalf':
|
||||
patronBlock = r'<div class="movies-letter">(.*)<div class="clearfix">'
|
||||
|
||||
@@ -60,13 +60,13 @@ def genres(item):
|
||||
|
||||
action = 'peliculas'
|
||||
if item.args == 'genres':
|
||||
patronBlock = r'<ul class="listSubCat" id="Film">(.*?)</ul>'
|
||||
patronBlock = r'<ul class="listSubCat" id="Film">(?P<block>.*)</ul>'
|
||||
elif item.args == 'years':
|
||||
patronBlock = r'<ul class="listSubCat" id="Anno">(.*?)</ul>'
|
||||
patronBlock = r'<ul class="listSubCat" id="Anno">(?P<block>.*)</ul>'
|
||||
elif item.args == 'quality':
|
||||
patronBlock = r'<ul class="listSubCat" id="Qualita">(.*?)</ul>'
|
||||
patronBlock = r'<ul class="listSubCat" id="Qualita">(?P<block>.*)</ul>'
|
||||
elif item.args == 'lucky': # sono i titoli random nella pagina, cambiano 1 volta al dì
|
||||
patronBlock = r'FILM RANDOM.*?class="listSubCat">(.*?)</ul>'
|
||||
patronBlock = r'FILM RANDOM.*?class="listSubCat">(?P<block>.*)</ul>'
|
||||
action = 'findvideos'
|
||||
|
||||
patron = r'<li><a href="(?P<url>[^"]+)">(?P<title>[^<]+)<'
|
||||
|
||||
@@ -37,7 +37,7 @@ def menu(item):
|
||||
|
||||
action='peliculas'
|
||||
patron = r'<li><a href="(?P<url>[^"]+)">(?P<title>[^<]+)</a></li>'
|
||||
patronBlock= r'<ul class="listSubCat" id="'+ str(item.args) + '">(.*?)</ul>'
|
||||
patronBlock= r'<ul class="listSubCat" id="'+ str(item.args) + '">(?P<block>.*)</ul>'
|
||||
|
||||
return locals()
|
||||
|
||||
|
||||
+26
-96
@@ -44,10 +44,9 @@ def mainlist(item):
|
||||
('Anni', ['', 'menu', 'Film per Anno'])
|
||||
]
|
||||
tvshow = ['/serietv/',
|
||||
('Aggiornamenti serie tv', ['/serietv/aggiornamento-quotidiano-serie-tv/', 'last']),
|
||||
('Per Lettera', ['/serietv/', 'menu', 'Serie-Tv per Lettera']),
|
||||
('Per Genere', ['/serietv/aggiornamento-quotidiano-serie-tv/', 'menu', 'Serie-Tv per Genere']),
|
||||
('Per anno', ['/serietv/aggiornamento-quotidiano-serie-tv/', 'menu', 'Serie-Tv per Anno'])
|
||||
('Per Genere', ['/serietv/', 'menu', 'Serie-Tv per Genere']),
|
||||
('Per anno', ['/serietv/', 'menu', 'Serie-Tv per Anno'])
|
||||
]
|
||||
|
||||
return locals()
|
||||
@@ -56,76 +55,39 @@ def mainlist(item):
|
||||
@support.scrape
|
||||
def menu(item):
|
||||
findhost()
|
||||
patronBlock = item.args + r'<span.*?><\/span>.*?<ul.*?>(.*?)<\/ul>'
|
||||
patronBlock = item.args + r'<span.*?><\/span>.*?<ul.*?>(?P<block>.*?)<\/ul>'
|
||||
patronMenu = r'href="?(?P<url>[^">]+)"?>(?P<title>.*?)<\/a>'
|
||||
action = 'peliculas'
|
||||
|
||||
return locals()
|
||||
|
||||
|
||||
@support.scrape
|
||||
def newest(categoria):
|
||||
findhost()
|
||||
itemlist = []
|
||||
debug = True
|
||||
item = Item()
|
||||
item.contentType = 'movie'
|
||||
item.url = host + '/lista-film-ultimi-100-film-aggiunti/'
|
||||
return support.scrape(item, r'<a href=([^>]+)>([^<([]+)(?:\[([A-Z]+)\])?\s\(([0-9]{4})\)<\/a>',
|
||||
['url', 'title', 'quality', 'year'],
|
||||
patronBlock=r'Ultimi 100 film aggiunti:.*?<\/td>')
|
||||
patron = "<a href=(?P<url>[^>]+)>(?P<title>[^<([]+)(?:\[(?P<quality>[A-Z]+)\])?\s\((?P<year>[0-9]{4})\)<\/a>"
|
||||
patronBlock = r'Ultimi 100 film aggiunti:.*?<\/td>'
|
||||
|
||||
return locals()
|
||||
|
||||
|
||||
def last(item):
|
||||
support.log()
|
||||
|
||||
itemlist = []
|
||||
infoLabels = {}
|
||||
quality = ''
|
||||
PERPAGE = 20
|
||||
page = 1
|
||||
if item.page:
|
||||
page = item.page
|
||||
def search(item, text):
|
||||
support.log(item.url, "search", text)
|
||||
|
||||
if item.contentType == 'tvshow':
|
||||
matches = support.match(item, r'<a href="([^">]+)".*?>([^(:(|[)]+)([^<]+)<\/a>', '<article class="sequex-post-content.*?</article>', headers)[0]
|
||||
else:
|
||||
matches = support.match(item, r'<a href=([^>]+)>([^(:(|[)]+)([^<]+)<\/a>', r'<strong>Ultimi 100 film Aggiornati:<\/a><\/strong>(.*?)<td>', headers)[0]
|
||||
try:
|
||||
item.url = item.url + "/?s=" + text.replace(' ', '+')
|
||||
return peliculas(item)
|
||||
|
||||
for i, (url, title, info) in enumerate(matches):
|
||||
if (page - 1) * PERPAGE > i: continue
|
||||
if i >= page * PERPAGE: break
|
||||
add = True
|
||||
title = title.rstrip()
|
||||
if item.contentType == 'tvshow':
|
||||
for i in itemlist:
|
||||
if i.url == url: # togliamo i doppi
|
||||
add = False
|
||||
else:
|
||||
infoLabels['year'] = scrapertoolsV2.find_single_match(info, r'\(([0-9]+)\)')
|
||||
quality = scrapertoolsV2.find_single_match(info, r'\[([A-Z]+)\]')
|
||||
|
||||
if quality:
|
||||
longtitle = title + support.typo(quality,'_ [] color kod')
|
||||
else:
|
||||
longtitle = title
|
||||
|
||||
if add:
|
||||
itemlist.append(
|
||||
Item(channel=item.channel,
|
||||
action='findvideos' if item.contentType == 'movie' else 'episodios',
|
||||
contentType=item.contentType,
|
||||
title=longtitle,
|
||||
fulltitle=title,
|
||||
show=title,
|
||||
quality=quality,
|
||||
url=url,
|
||||
infoLabels=infoLabels
|
||||
)
|
||||
)
|
||||
support.pagination(itemlist, item, page, PERPAGE)
|
||||
|
||||
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
|
||||
|
||||
return itemlist
|
||||
# Continua la ricerca in caso di errore
|
||||
except:
|
||||
import sys
|
||||
for line in sys.exc_info():
|
||||
logger.error("%s" % line)
|
||||
return []
|
||||
|
||||
|
||||
@support.scrape
|
||||
@@ -137,51 +99,19 @@ def peliculas(item):
|
||||
patron = r'div class="card-image">.*?<img src="(?P<thumb>[^ ]+)" alt.*?<a href="(?P<url>[^ >]+)">(?P<title>[^<[(]+)<\/a>.*?<strong><span style="[^"]+">(?P<genre>[^<>0-9(]+)\((?P<year>[0-9]{4}).*?</(?:p|div)>(?P<plot>.*?)</div'
|
||||
action = 'episodios'
|
||||
|
||||
patronBlock=[r'<div class="?sequex-page-left"?>(.*?)<aside class="?sequex-page-right"?>',
|
||||
'<div class="?card-image"?>.*?(?=<div class="?card-image"?>|<div class="?rating"?>)']
|
||||
# patronBlock=[r'<div class="?sequex-page-left"?>(?P<block>.*?)<aside class="?sequex-page-right"?>',
|
||||
# '<div class="?card-image"?>.*?(?=<div class="?card-image"?>|<div class="?rating"?>)']
|
||||
patronNext='<a class="?page-link"? href="?([^>]+)"?><i class="fa fa-angle-right">'
|
||||
|
||||
return locals()
|
||||
|
||||
|
||||
@support.scrape
|
||||
def episodios(item):
|
||||
itemlist = []
|
||||
patronBlock = r'(?P<block><div class="sp-head[a-z ]*?" title="Espandi">\s*STAGIONE [0-9]+ - (?P<lang>[^\s]+)(?: - (?P<quality>[^-<]+))?.*?[^<>]*?</div>.*?)<div class="spdiv">\[riduci\]</div>'
|
||||
patron = '(?:<p>)(?P<episode>[0-9]+(?:×|×)[0-9]+)(?P<url>.*?)(?:</p>|<br)'
|
||||
|
||||
data = httptools.downloadpage(item.url).data
|
||||
matches = scrapertoolsV2.find_multiple_matches(data,
|
||||
r'(<div class="sp-head[a-z ]*?" title="Espandi">[^<>]*?</div>.*?)<div class="spdiv">\[riduci\]</div>')
|
||||
|
||||
for match in matches:
|
||||
support.log(match)
|
||||
blocks = scrapertoolsV2.find_multiple_matches(match, '(?:<p>)(.*?)(?:</p>|<br)')
|
||||
season = scrapertoolsV2.find_single_match(match, r'title="Espandi">.*?STAGIONE\s+\d+([^<>]+)').strip()
|
||||
|
||||
for block in blocks:
|
||||
episode = scrapertoolsV2.find_single_match(block, r'([0-9]+(?:×|×)[0-9]+)').strip()
|
||||
seasons_n = scrapertoolsV2.find_single_match(block, r'<strong>STAGIONE\s+\d+([^<>]+)').strip()
|
||||
|
||||
if seasons_n:
|
||||
season = seasons_n
|
||||
|
||||
if not episode: continue
|
||||
|
||||
season = re.sub(r'–|–', "-", season)
|
||||
itemlist.append(
|
||||
Item(channel=item.channel,
|
||||
action="findvideos",
|
||||
contentType='episode',
|
||||
title="[B]" + episode + "[/B] " + season,
|
||||
fulltitle=episode + " " + season,
|
||||
show=episode + " " + season,
|
||||
url=block,
|
||||
extra=item.extra,
|
||||
thumbnail=item.thumbnail,
|
||||
infoLabels=item.infoLabels
|
||||
))
|
||||
|
||||
support.videolibrary(itemlist, item)
|
||||
|
||||
return itemlist
|
||||
return locals()
|
||||
|
||||
|
||||
def findvideos(item):
|
||||
|
||||
@@ -43,7 +43,7 @@ def mainlist(item):
|
||||
return locals()
|
||||
|
||||
def genres(item):
|
||||
return support.scrape2(item, patronBlock=r'<div id="bordobar" class="dropdown-menu(.*?)</li>', patron=r'<a class="dropdown-item" href="([^"]+)" title="([A-z]+)"', listGroups=['url', 'title'], action='video')
|
||||
return support.scrape2(item, patronBlock=r'<div id="bordobar" class="dropdown-menu(?P<block>.*)</li>', patron=r'<a class="dropdown-item" href="([^"]+)" title="([A-z]+)"', listGroups=['url', 'title'], action='video')
|
||||
|
||||
|
||||
def peliculas(item):
|
||||
|
||||
@@ -208,7 +208,7 @@ def findvideos(item):
|
||||
itemlist = []
|
||||
|
||||
# data = httptools.downloadpage(item.url, headers=headers).data
|
||||
patronBlock = '<div class="entry-content">(.*?)<footer class="entry-footer">'
|
||||
patronBlock = '<div class="entry-content">(?P<block>.*)<footer class="entry-footer">'
|
||||
# bloque = scrapertools.find_single_match(data, patronBlock)
|
||||
|
||||
patron = r'<a href="([^"]+)">'
|
||||
|
||||
+1
-1
@@ -75,7 +75,7 @@ def newest(categoria):
|
||||
def genre(item):
|
||||
patronMenu = '<a href="(?P<url>[^"]+)">(?P<title>[^<]+)</a>'
|
||||
blacklist = ['Serie TV','Serie TV Americane','Serie TV Italiane','altadefinizione']
|
||||
patronBlock = '<ul class="sub-menu">(.*?)</ul>'
|
||||
patronBlock = '<ul class="sub-menu">(?P<block>.*)</ul>'
|
||||
|
||||
return locals()
|
||||
|
||||
|
||||
@@ -41,17 +41,17 @@ def peliculas(item):
|
||||
if item.args == 'search':
|
||||
patron = r'<h2 class="entry-title"><a href="(?P<url>[^"]+)" rel="bookmark">(?P<title>[^<]+)</a>'
|
||||
elif item.args == 'last':
|
||||
patronBlock = 'Aggiornamenti</h2>(.*?)</ul>'
|
||||
patronBlock = 'Aggiornamenti</h2>(?P<block>.*)</ul>'
|
||||
patron = '<a href="(?P<url>[^"]+)">(?P<title>[^<]+)</a>'
|
||||
elif item.args == 'most_view':
|
||||
patronBlock = 'I piu visti</h2>(.*?)</ul>'
|
||||
patronBlock = 'I piu visti</h2>(?P<block>.*)</ul>'
|
||||
patron = '<a href="(?P<url>[^"]+)" title="(?P<title>[^"]+)"'
|
||||
elif item.args == 'new':
|
||||
patronBlock = '<main[^>]+>(.*?)</main>'
|
||||
patronBlock = '<main[^>]+>(?P<block>.*)</main>'
|
||||
patron = '<a href="(?P<url>[^"]+)" rel="bookmark">(?P<title>[^<]+)</a>[^>]+>[^>]+>[^>]+><img.*?src="(?P<thumb>[^"]+)".*?<p>(?P<plot>[^<]+)</p>'
|
||||
patronNext = '<a class="next page-numbers" href="([^"]+)">'
|
||||
else:
|
||||
patronBlock = '"lcp_catlist"[^>]+>(.*?)</ul>'
|
||||
patronBlock = '"lcp_catlist"[^>]+>(?P<block>.*)</ul>'
|
||||
patron = r'<li ><a href="(?P<url>[^"]+)" title="[^>]+">(?P<title>[^<|\(]+)?(?:\([^\d]*(?P<year>\d+)\))?[^<]*</a>'
|
||||
|
||||
action = 'findvideos' if item.contentType == 'movie' else 'check'
|
||||
|
||||
+183
-165
@@ -130,7 +130,7 @@ def regexDbg(item, patron, headers, data=''):
|
||||
'regex': patron,
|
||||
'flags': 'gm',
|
||||
'testString': html,
|
||||
'delimiter': '"',
|
||||
'delimiter': '"""',
|
||||
'flavor': 'python'
|
||||
}
|
||||
r = urllib2.Request(url + '/api/regex', json.dumps(data), headers=headers)
|
||||
@@ -141,15 +141,138 @@ def regexDbg(item, patron, headers, data=''):
|
||||
|
||||
def scrape2(item, patron = '', listGroups = [], headers="", blacklist="", data="", patronBlock="",
|
||||
patronNext="", action="findvideos", addVideolibrary = True, typeContentDict={}, typeActionDict={}):
|
||||
m = re.search(r'\((?!\?)', patron)
|
||||
m = re.search(r'(?<!\\|\[)\((?!\?)', patron)
|
||||
n = 0
|
||||
while m:
|
||||
patron = patron[:m.end()] + '?P<' + listGroups[n] + '>' + patron[m.end():]
|
||||
m = re.search(r'\((?!\?)', patron)
|
||||
m = re.search(r'(?<!\\|\[)\((?!\?)', patron)
|
||||
n += 1
|
||||
regexDbg(item, patron, headers)
|
||||
|
||||
|
||||
def scrapeLang(scraped, lang, longtitle):
|
||||
## Aggiunto/modificato per gestire i siti che hanno i video
|
||||
## in ita e subita delle serie tv nella stessa pagina
|
||||
if scraped['lang']:
|
||||
if 'sub' in scraped['lang'].lower():
|
||||
lang = 'Sub-ITA'
|
||||
else:
|
||||
lang = 'ITA'
|
||||
if lang != '':
|
||||
longtitle += typo(lang, '_ [] color kod')
|
||||
|
||||
return lang, longtitle
|
||||
|
||||
|
||||
def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, pag):
|
||||
itemlist = []
|
||||
|
||||
matches = scrapertoolsV2.find_multiple_matches_groups(block, patron)
|
||||
log('MATCHES =', matches)
|
||||
|
||||
if debug:
|
||||
regexDbg(item, patron, headers, block)
|
||||
|
||||
known_keys = ['url', 'title', 'title2', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere',
|
||||
'rating', 'type', 'lang'] # by greko aggiunto episode
|
||||
lang = '' # aggiunto per gestire i siti con pagine di serietv dove si hanno i video in ita e in subita
|
||||
|
||||
for i, match in enumerate(matches):
|
||||
if pagination and (pag - 1) * pagination > i: continue # pagination
|
||||
if pagination and i >= pag * pagination: break # pagination
|
||||
listGroups = match.keys()
|
||||
match = match.values()
|
||||
|
||||
if len(listGroups) > len(match): # to fix a bug
|
||||
match = list(match)
|
||||
match.extend([''] * (len(listGroups) - len(match)))
|
||||
|
||||
scraped = {}
|
||||
for kk in known_keys:
|
||||
val = match[listGroups.index(kk)] if kk in listGroups else ''
|
||||
if val and (kk == "url" or kk == 'thumb') and 'http' not in val:
|
||||
val = scrapertoolsV2.find_single_match(item.url, 'https?://[a-z0-9.-]+') + val
|
||||
scraped[kk] = val
|
||||
|
||||
if scraped['title']:
|
||||
title = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped['title'])
|
||||
.replace('"', "'").replace('×', 'x').replace('–',
|
||||
'-')).strip() # fix by greko da " a '
|
||||
else:
|
||||
title = ''
|
||||
|
||||
plot = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped["plot"]))
|
||||
|
||||
longtitle = typo(title, 'bold')
|
||||
if scraped['quality']: longtitle = longtitle + typo(scraped['quality'], '_ [] color kod')
|
||||
if scraped['episode']:
|
||||
scraped['episode'] = re.sub(r'\s-\s|-|x|–|×', 'x', scraped['episode'])
|
||||
longtitle = typo(scraped['episode'] + ' - ', 'bold') + longtitle
|
||||
if scraped['title2']:
|
||||
title2 = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped['title2'])
|
||||
.replace('"', "'").replace('×', 'x').replace('–', '-')).strip()
|
||||
longtitle = longtitle + typo(title2, 'bold _ -- _')
|
||||
|
||||
lang, longitle = scrapeLang(scraped, lang, longtitle)
|
||||
|
||||
# if title is set, probably this is a list of episodes or video sources
|
||||
if item.infoLabels["title"]:
|
||||
infolabels = item.infoLabels
|
||||
else:
|
||||
infolabels = {}
|
||||
if scraped['year']:
|
||||
infolabels['year'] = scraped['year']
|
||||
if scraped["plot"]:
|
||||
infolabels['plot'] = plot
|
||||
if scraped['duration']:
|
||||
matches = scrapertoolsV2.find_multiple_matches(scraped['duration'],
|
||||
r'([0-9])\s*?(?:[hH]|:|\.|,|\\|\/|\||\s)\s*?([0-9]+)')
|
||||
for h, m in matches:
|
||||
scraped['duration'] = int(h) * 60 + int(m)
|
||||
if not matches:
|
||||
scraped['duration'] = scrapertoolsV2.find_single_match(scraped['duration'], r'(\d+)')
|
||||
infolabels['duration'] = int(scraped['duration']) * 60
|
||||
if scraped['genere']:
|
||||
genres = scrapertoolsV2.find_multiple_matches(scraped['genere'], '[A-Za-z]+')
|
||||
infolabels['genere'] = ", ".join(genres)
|
||||
if scraped["rating"]:
|
||||
infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scraped["rating"])
|
||||
|
||||
if typeContentDict:
|
||||
for name, variants in typeContentDict.items():
|
||||
if scraped['type'] in variants:
|
||||
item.contentType = name
|
||||
if typeActionDict:
|
||||
for name, variants in typeActionDict.items():
|
||||
if scraped['type'] in variants:
|
||||
action = name
|
||||
|
||||
if (scraped["title"] and scraped["title"] not in blacklist) or longtitle:
|
||||
it = Item(
|
||||
channel=item.channel,
|
||||
action=action,
|
||||
contentType='episode' if (
|
||||
action == 'findvideos' and item.contentType == 'tvshow') else item.contentType,
|
||||
title=longtitle,
|
||||
fulltitle=item.fulltitle if (action == 'findvideos' and item.contentType != 'movie') else title,
|
||||
show=item.show if (action == 'findvideos' and item.contentType != 'movie') else title,
|
||||
quality=scraped["quality"],
|
||||
url=scraped["url"],
|
||||
infoLabels=infolabels,
|
||||
thumbnail=scraped["thumb"],
|
||||
args=item.args,
|
||||
contentSerieName=title if (action == 'episodios' and item.contentType != 'movie') else '',
|
||||
contentLanguage=lang
|
||||
)
|
||||
|
||||
for lg in list(set(listGroups).difference(known_keys)):
|
||||
it.__setattr__(lg, match[listGroups.index(lg)])
|
||||
|
||||
if 'itemHook' in args:
|
||||
it = args['itemHook'](it)
|
||||
itemlist.append(it)
|
||||
return itemlist, matches
|
||||
|
||||
|
||||
def scrape(func):
|
||||
# args is a dict containing the foolowing keys:
|
||||
@@ -188,7 +311,7 @@ def scrape(func):
|
||||
action = args['action'] if 'action' in args else 'findvideos'
|
||||
anime = args['anime'] if 'anime' in args else ''
|
||||
addVideolibrary = args['addVideolibrary'] if 'addVideolibrary' in args else True
|
||||
blacklist = args['blacklist'] if 'blacklist' in args else ''
|
||||
blacklist = args['blacklist'] if 'blacklist' in args else []
|
||||
data = args['data'] if 'data' in args else ''
|
||||
patron = args['patron'] if 'patron' in args else args['patronMenu'] if 'patronMenu' in args else ''
|
||||
headers = args['headers'] if 'headers' in args else func.__globals__['headers']
|
||||
@@ -200,6 +323,9 @@ def scrape(func):
|
||||
if 'pagination' in args: pagination = args['pagination'] if args['pagination'] else 20
|
||||
else: pagination = ''
|
||||
|
||||
pag = item.page if item.page else 1 # pagination
|
||||
matches = []
|
||||
|
||||
log('PATRON= ', patron)
|
||||
if not data:
|
||||
data = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True).data.replace("'", '"')
|
||||
@@ -207,175 +333,67 @@ def scrape(func):
|
||||
# replace all ' with " and eliminate newline, so we don't need to worry about
|
||||
log('DATA =', data)
|
||||
|
||||
block = data
|
||||
if patronBlock:
|
||||
blocks = scrapertoolsV2.find_multiple_matches_groups(data, patronBlock)
|
||||
block = ""
|
||||
for bl in blocks:
|
||||
blockItemlist, blockMatches = scrapeBlock(item, args, bl['block'], patron, headers, action, pagination, debug,
|
||||
typeContentDict, typeActionDict, blacklist, pag)
|
||||
for it in blockItemlist:
|
||||
if 'lang' in bl:
|
||||
it.contentLanguage, it.title = scrapeLang(bl, it.contentLanguage, it.title)
|
||||
if 'quality' in bl and bl['quality']:
|
||||
it.quality = bl['quality']
|
||||
it.title = it.title + typo(bl['quality'], '_ [] color kod')
|
||||
log('BLOCK ', '=', block)
|
||||
itemlist.extend(blockItemlist)
|
||||
matches.extend(blockMatches)
|
||||
elif patron:
|
||||
itemlist, matches = scrapeBlock(item, args, data, patron, headers, action, pagination, debug, typeContentDict,
|
||||
typeActionDict, blacklist, pag)
|
||||
|
||||
if patronBlock:
|
||||
if type(patronBlock) == str:
|
||||
patronBlock = [patronBlock]
|
||||
checkHost(item, itemlist)
|
||||
|
||||
for n, regex in enumerate(patronBlock):
|
||||
blocks = scrapertoolsV2.find_multiple_matches(block, regex)
|
||||
block = ""
|
||||
for b in blocks:
|
||||
block += "\n" + str(b)
|
||||
log('BLOCK ', n, '=', block)
|
||||
else:
|
||||
block = data
|
||||
if patron:
|
||||
matches = scrapertoolsV2.find_multiple_matches_groups(block, patron)
|
||||
log('MATCHES =', matches)
|
||||
if (item.contentType == "tvshow" and (action != "findvideos" and action != "play")) \
|
||||
or (item.contentType == "episode" and action != "play") \
|
||||
or (item.contentType == "movie" and action != "play") :
|
||||
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
|
||||
# else: # Si perde item show :(
|
||||
# for it in itemlist:
|
||||
# it.infoLabels = item.infoLabels
|
||||
|
||||
if debug:
|
||||
regexDbg(item, patron, headers, block)
|
||||
if 'itemlistHook' in args:
|
||||
itemlist = args['itemlistHook'](itemlist)
|
||||
|
||||
known_keys = ['url', 'title', 'title2', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere',
|
||||
'rating', 'type', 'lang'] # by greko aggiunto episode
|
||||
lang = '' # aggiunto per gestire i siti con pagine di serietv dove si hanno i video in ita e in subita
|
||||
|
||||
pag = item.page if item.page else 1 # pagination
|
||||
if patronNext:
|
||||
nextPage(itemlist, item, data, patronNext, 2)
|
||||
|
||||
for i, match in enumerate(matches):
|
||||
if pagination and (pag - 1) * pagination > i: continue # pagination
|
||||
if pagination and i >= pag * pagination: break # pagination
|
||||
listGroups = match.keys()
|
||||
match = match.values()
|
||||
# next page for pagination
|
||||
if pagination and len(matches) >= pag * pagination:
|
||||
itemlist.append(
|
||||
Item(channel=item.channel,
|
||||
action = item.action,
|
||||
contentType=item.contentType,
|
||||
title=typo(config.get_localized_string(30992), 'color kod bold'),
|
||||
url=item.url,
|
||||
args=item.args,
|
||||
page=pag + 1,
|
||||
thumbnail=thumb()))
|
||||
|
||||
if len(listGroups) > len(match): # to fix a bug
|
||||
match = list(match)
|
||||
match.extend([''] * (len(listGroups) - len(match)))
|
||||
if anime:
|
||||
from specials import autorenumber
|
||||
if inspect.stack()[1][3] == 'episodios' or item.action == 'episodios': autorenumber.renumber(itemlist, item, 'bold')
|
||||
else: autorenumber.renumber(itemlist)
|
||||
|
||||
scraped = {}
|
||||
for kk in known_keys:
|
||||
val = match[listGroups.index(kk)] if kk in listGroups else ''
|
||||
if val and (kk == "url" or kk == 'thumb') and 'http' not in val:
|
||||
val = scrapertoolsV2.find_single_match(item.url, 'https?://[a-z0-9.-]+') + val
|
||||
scraped[kk] = val
|
||||
if addVideolibrary and (item.infoLabels["title"] or item.fulltitle):
|
||||
item.fulltitle = item.infoLabels["title"]
|
||||
videolibrary(itemlist, item)
|
||||
|
||||
if scraped['title']:
|
||||
title = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped['title'])
|
||||
.replace('"',"'").replace('×', 'x').replace('–','-')).strip() # fix by greko da " a '
|
||||
else:
|
||||
title = ''
|
||||
|
||||
plot = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped["plot"]))
|
||||
if 'patronMenu' in args:
|
||||
itemlist = thumb(itemlist, genre=True)
|
||||
|
||||
longtitle = typo(title, 'bold')
|
||||
if scraped['quality']: longtitle = longtitle + typo(scraped['quality'], '_ [] color kod')
|
||||
if scraped['episode']:
|
||||
scraped['episode'] = re.sub(r'\s-\s|-|x|–', 'x', scraped['episode'])
|
||||
longtitle = typo(scraped['episode'] + ' - ', 'bold') + longtitle
|
||||
if scraped['title2']:
|
||||
title2 = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped['title2'])
|
||||
.replace('"', "'").replace('×', 'x').replace('–','-')).strip()
|
||||
longtitle = longtitle + typo(title2, 'bold _ -- _')
|
||||
|
||||
## Aggiunto/modificato per gestire i siti che hanno i video
|
||||
## in ita e subita delle serie tv nella stessa pagina
|
||||
if scraped['lang']:
|
||||
if 'sub' in scraped['lang'].lower():
|
||||
lang = 'Sub-ITA'
|
||||
else:
|
||||
lang = 'ITA'
|
||||
if lang != '':
|
||||
longtitle += typo(lang, '_ [] color kod')
|
||||
|
||||
# if title is set, probably this is a list of episodes or video sources
|
||||
if item.infoLabels["title"] or item.fulltitle:
|
||||
infolabels = item.infoLabels
|
||||
else:
|
||||
infolabels = {}
|
||||
if scraped['year']:
|
||||
infolabels['year'] = scraped['year']
|
||||
if scraped["plot"]:
|
||||
infolabels['plot'] = plot
|
||||
if scraped['duration']:
|
||||
matches = scrapertoolsV2.find_multiple_matches(scraped['duration'],
|
||||
r'([0-9])\s*?(?:[hH]|:|\.|,|\\|\/|\||\s)\s*?([0-9]+)')
|
||||
for h, m in matches:
|
||||
scraped['duration'] = int(h) * 60 + int(m)
|
||||
if not matches:
|
||||
scraped['duration'] = scrapertoolsV2.find_single_match(scraped['duration'], r'(\d+)')
|
||||
infolabels['duration'] = int(scraped['duration']) * 60
|
||||
if scraped['genere']:
|
||||
genres = scrapertoolsV2.find_multiple_matches(scraped['genere'], '[A-Za-z]+')
|
||||
infolabels['genere'] = ", ".join(genres)
|
||||
if scraped["rating"]:
|
||||
infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scraped["rating"])
|
||||
|
||||
if typeContentDict:
|
||||
for name, variants in typeContentDict.items():
|
||||
if scraped['type'] in variants:
|
||||
item.contentType = name
|
||||
if typeActionDict:
|
||||
for name, variants in typeActionDict.items():
|
||||
if scraped['type'] in variants:
|
||||
action = name
|
||||
|
||||
if scraped["title"]:
|
||||
if scraped["title"] not in blacklist:
|
||||
it = Item(
|
||||
channel=item.channel,
|
||||
action=action,
|
||||
contentType= 'episode' if (action == 'findvideos' and item.contentType == 'tvshow') else item.contentType,
|
||||
title=longtitle,
|
||||
fulltitle=item.fulltitle if (action == 'findvideos' and item.contentType != 'movie') else title,
|
||||
show=item.show if (action == 'findvideos' and item.contentType != 'movie') else title,
|
||||
quality=scraped["quality"],
|
||||
url=scraped["url"],
|
||||
infoLabels=infolabels,
|
||||
thumbnail=scraped["thumb"],
|
||||
args=item.args,
|
||||
contentSerieName = title if (action == 'episodios' and item.contentType != 'movie') else ''
|
||||
)
|
||||
|
||||
for lg in list(set(listGroups).difference(known_keys)):
|
||||
it.__setattr__(lg, match[listGroups.index(lg)])
|
||||
|
||||
if 'itemHook' in args:
|
||||
it = args['itemHook'](it)
|
||||
itemlist.append(it)
|
||||
checkHost(item, itemlist)
|
||||
|
||||
if (item.contentType == "tvshow" and (action != "findvideos" and action != "play")) \
|
||||
or (item.contentType == "episode" and action != "play") \
|
||||
or (item.contentType == "movie" and action != "play") :
|
||||
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
|
||||
# else: # Si perde item show :(
|
||||
# for it in itemlist:
|
||||
# it.infoLabels = item.infoLabels
|
||||
|
||||
if 'itemlistHook' in args:
|
||||
itemlist = args['itemlistHook'](itemlist)
|
||||
|
||||
if patronNext:
|
||||
nextPage(itemlist, item, data, patronNext, 2)
|
||||
|
||||
# next page for pagination
|
||||
if pagination and len(matches) >= pag * pagination:
|
||||
itemlist.append(
|
||||
Item(channel=item.channel,
|
||||
action = item.action,
|
||||
contentType=item.contentType,
|
||||
title=typo(config.get_localized_string(30992), 'color kod bold'),
|
||||
url=item.url,
|
||||
args=item.args,
|
||||
page=pag + 1,
|
||||
thumbnail=thumb()))
|
||||
|
||||
if anime:
|
||||
from specials import autorenumber
|
||||
if inspect.stack()[1][3] == 'episodios' or item.action == 'episodios': autorenumber.renumber(itemlist, item, 'bold')
|
||||
else: autorenumber.renumber(itemlist)
|
||||
|
||||
if addVideolibrary and (item.infoLabels["title"] or item.fulltitle):
|
||||
item.fulltitle = item.infoLabels["title"]
|
||||
videolibrary(itemlist, item)
|
||||
|
||||
if 'patronMenu' in args:
|
||||
itemlist = thumb(itemlist, genre=True)
|
||||
|
||||
if 'fullItemlistHook' in args:
|
||||
itemlist = args['fullItemlistHook'](itemlist)
|
||||
if 'fullItemlistHook' in args:
|
||||
itemlist = args['fullItemlistHook'](itemlist)
|
||||
|
||||
return itemlist
|
||||
|
||||
@@ -604,7 +622,7 @@ def menu(func):
|
||||
args=var[2] if len(var) > 2 else '',
|
||||
contentType= var[3] if len(var) > 3 else 'movie',)
|
||||
# add search menu for category
|
||||
if 'search' not in args: menuItem(itemlist, filename, 'Cerca ' + title + '… submenu bold', 'search', host, args=name)
|
||||
if 'search' not in args: menuItem(itemlist, filename, 'Cerca ' + title + '… submenu bold', 'search', host + url, args=name)
|
||||
|
||||
# Make EXTRA MENU (on bottom)
|
||||
for name, var in args.items():
|
||||
|
||||
Reference in New Issue
Block a user