migliorie a support

This commit is contained in:
mac12m99
2019-08-07 16:47:07 +02:00
parent b14673e4eb
commit 28474b0249
10 changed files with 230 additions and 282 deletions
+183 -165
View File
@@ -130,7 +130,7 @@ def regexDbg(item, patron, headers, data=''):
'regex': patron,
'flags': 'gm',
'testString': html,
'delimiter': '"',
'delimiter': '"""',
'flavor': 'python'
}
r = urllib2.Request(url + '/api/regex', json.dumps(data), headers=headers)
@@ -141,15 +141,138 @@ def regexDbg(item, patron, headers, data=''):
def scrape2(item, patron = '', listGroups = [], headers="", blacklist="", data="", patronBlock="",
patronNext="", action="findvideos", addVideolibrary = True, typeContentDict={}, typeActionDict={}):
m = re.search(r'\((?!\?)', patron)
m = re.search(r'(?<!\\|\[)\((?!\?)', patron)
n = 0
while m:
patron = patron[:m.end()] + '?P<' + listGroups[n] + '>' + patron[m.end():]
m = re.search(r'\((?!\?)', patron)
m = re.search(r'(?<!\\|\[)\((?!\?)', patron)
n += 1
regexDbg(item, patron, headers)
def scrapeLang(scraped, lang, longtitle):
## Aggiunto/modificato per gestire i siti che hanno i video
## in ita e subita delle serie tv nella stessa pagina
if scraped['lang']:
if 'sub' in scraped['lang'].lower():
lang = 'Sub-ITA'
else:
lang = 'ITA'
if lang != '':
longtitle += typo(lang, '_ [] color kod')
return lang, longtitle
def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, pag):
itemlist = []
matches = scrapertoolsV2.find_multiple_matches_groups(block, patron)
log('MATCHES =', matches)
if debug:
regexDbg(item, patron, headers, block)
known_keys = ['url', 'title', 'title2', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere',
'rating', 'type', 'lang'] # by greko aggiunto episode
lang = '' # aggiunto per gestire i siti con pagine di serietv dove si hanno i video in ita e in subita
for i, match in enumerate(matches):
if pagination and (pag - 1) * pagination > i: continue # pagination
if pagination and i >= pag * pagination: break # pagination
listGroups = match.keys()
match = match.values()
if len(listGroups) > len(match): # to fix a bug
match = list(match)
match.extend([''] * (len(listGroups) - len(match)))
scraped = {}
for kk in known_keys:
val = match[listGroups.index(kk)] if kk in listGroups else ''
if val and (kk == "url" or kk == 'thumb') and 'http' not in val:
val = scrapertoolsV2.find_single_match(item.url, 'https?://[a-z0-9.-]+') + val
scraped[kk] = val
if scraped['title']:
title = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped['title'])
.replace('"', "'").replace('×', 'x').replace('',
'-')).strip() # fix by greko da " a '
else:
title = ''
plot = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped["plot"]))
longtitle = typo(title, 'bold')
if scraped['quality']: longtitle = longtitle + typo(scraped['quality'], '_ [] color kod')
if scraped['episode']:
scraped['episode'] = re.sub(r'\s-\s|-|x|&#8211|&#215;', 'x', scraped['episode'])
longtitle = typo(scraped['episode'] + ' - ', 'bold') + longtitle
if scraped['title2']:
title2 = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped['title2'])
.replace('"', "'").replace('×', 'x').replace('', '-')).strip()
longtitle = longtitle + typo(title2, 'bold _ -- _')
lang, longitle = scrapeLang(scraped, lang, longtitle)
# if title is set, probably this is a list of episodes or video sources
if item.infoLabels["title"]:
infolabels = item.infoLabels
else:
infolabels = {}
if scraped['year']:
infolabels['year'] = scraped['year']
if scraped["plot"]:
infolabels['plot'] = plot
if scraped['duration']:
matches = scrapertoolsV2.find_multiple_matches(scraped['duration'],
r'([0-9])\s*?(?:[hH]|:|\.|,|\\|\/|\||\s)\s*?([0-9]+)')
for h, m in matches:
scraped['duration'] = int(h) * 60 + int(m)
if not matches:
scraped['duration'] = scrapertoolsV2.find_single_match(scraped['duration'], r'(\d+)')
infolabels['duration'] = int(scraped['duration']) * 60
if scraped['genere']:
genres = scrapertoolsV2.find_multiple_matches(scraped['genere'], '[A-Za-z]+')
infolabels['genere'] = ", ".join(genres)
if scraped["rating"]:
infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scraped["rating"])
if typeContentDict:
for name, variants in typeContentDict.items():
if scraped['type'] in variants:
item.contentType = name
if typeActionDict:
for name, variants in typeActionDict.items():
if scraped['type'] in variants:
action = name
if (scraped["title"] and scraped["title"] not in blacklist) or longtitle:
it = Item(
channel=item.channel,
action=action,
contentType='episode' if (
action == 'findvideos' and item.contentType == 'tvshow') else item.contentType,
title=longtitle,
fulltitle=item.fulltitle if (action == 'findvideos' and item.contentType != 'movie') else title,
show=item.show if (action == 'findvideos' and item.contentType != 'movie') else title,
quality=scraped["quality"],
url=scraped["url"],
infoLabels=infolabels,
thumbnail=scraped["thumb"],
args=item.args,
contentSerieName=title if (action == 'episodios' and item.contentType != 'movie') else '',
contentLanguage=lang
)
for lg in list(set(listGroups).difference(known_keys)):
it.__setattr__(lg, match[listGroups.index(lg)])
if 'itemHook' in args:
it = args['itemHook'](it)
itemlist.append(it)
return itemlist, matches
def scrape(func):
# args is a dict containing the foolowing keys:
@@ -188,7 +311,7 @@ def scrape(func):
action = args['action'] if 'action' in args else 'findvideos'
anime = args['anime'] if 'anime' in args else ''
addVideolibrary = args['addVideolibrary'] if 'addVideolibrary' in args else True
blacklist = args['blacklist'] if 'blacklist' in args else ''
blacklist = args['blacklist'] if 'blacklist' in args else []
data = args['data'] if 'data' in args else ''
patron = args['patron'] if 'patron' in args else args['patronMenu'] if 'patronMenu' in args else ''
headers = args['headers'] if 'headers' in args else func.__globals__['headers']
@@ -200,6 +323,9 @@ def scrape(func):
if 'pagination' in args: pagination = args['pagination'] if args['pagination'] else 20
else: pagination = ''
pag = item.page if item.page else 1 # pagination
matches = []
log('PATRON= ', patron)
if not data:
data = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True).data.replace("'", '"')
@@ -207,175 +333,67 @@ def scrape(func):
# replace all ' with " and eliminate newline, so we don't need to worry about
log('DATA =', data)
block = data
if patronBlock:
blocks = scrapertoolsV2.find_multiple_matches_groups(data, patronBlock)
block = ""
for bl in blocks:
blockItemlist, blockMatches = scrapeBlock(item, args, bl['block'], patron, headers, action, pagination, debug,
typeContentDict, typeActionDict, blacklist, pag)
for it in blockItemlist:
if 'lang' in bl:
it.contentLanguage, it.title = scrapeLang(bl, it.contentLanguage, it.title)
if 'quality' in bl and bl['quality']:
it.quality = bl['quality']
it.title = it.title + typo(bl['quality'], '_ [] color kod')
log('BLOCK ', '=', block)
itemlist.extend(blockItemlist)
matches.extend(blockMatches)
elif patron:
itemlist, matches = scrapeBlock(item, args, data, patron, headers, action, pagination, debug, typeContentDict,
typeActionDict, blacklist, pag)
if patronBlock:
if type(patronBlock) == str:
patronBlock = [patronBlock]
checkHost(item, itemlist)
for n, regex in enumerate(patronBlock):
blocks = scrapertoolsV2.find_multiple_matches(block, regex)
block = ""
for b in blocks:
block += "\n" + str(b)
log('BLOCK ', n, '=', block)
else:
block = data
if patron:
matches = scrapertoolsV2.find_multiple_matches_groups(block, patron)
log('MATCHES =', matches)
if (item.contentType == "tvshow" and (action != "findvideos" and action != "play")) \
or (item.contentType == "episode" and action != "play") \
or (item.contentType == "movie" and action != "play") :
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
# else: # Si perde item show :(
# for it in itemlist:
# it.infoLabels = item.infoLabels
if debug:
regexDbg(item, patron, headers, block)
if 'itemlistHook' in args:
itemlist = args['itemlistHook'](itemlist)
known_keys = ['url', 'title', 'title2', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere',
'rating', 'type', 'lang'] # by greko aggiunto episode
lang = '' # aggiunto per gestire i siti con pagine di serietv dove si hanno i video in ita e in subita
pag = item.page if item.page else 1 # pagination
if patronNext:
nextPage(itemlist, item, data, patronNext, 2)
for i, match in enumerate(matches):
if pagination and (pag - 1) * pagination > i: continue # pagination
if pagination and i >= pag * pagination: break # pagination
listGroups = match.keys()
match = match.values()
# next page for pagination
if pagination and len(matches) >= pag * pagination:
itemlist.append(
Item(channel=item.channel,
action = item.action,
contentType=item.contentType,
title=typo(config.get_localized_string(30992), 'color kod bold'),
url=item.url,
args=item.args,
page=pag + 1,
thumbnail=thumb()))
if len(listGroups) > len(match): # to fix a bug
match = list(match)
match.extend([''] * (len(listGroups) - len(match)))
if anime:
from specials import autorenumber
if inspect.stack()[1][3] == 'episodios' or item.action == 'episodios': autorenumber.renumber(itemlist, item, 'bold')
else: autorenumber.renumber(itemlist)
scraped = {}
for kk in known_keys:
val = match[listGroups.index(kk)] if kk in listGroups else ''
if val and (kk == "url" or kk == 'thumb') and 'http' not in val:
val = scrapertoolsV2.find_single_match(item.url, 'https?://[a-z0-9.-]+') + val
scraped[kk] = val
if addVideolibrary and (item.infoLabels["title"] or item.fulltitle):
item.fulltitle = item.infoLabels["title"]
videolibrary(itemlist, item)
if scraped['title']:
title = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped['title'])
.replace('"',"'").replace('×', 'x').replace('','-')).strip() # fix by greko da " a '
else:
title = ''
plot = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped["plot"]))
if 'patronMenu' in args:
itemlist = thumb(itemlist, genre=True)
longtitle = typo(title, 'bold')
if scraped['quality']: longtitle = longtitle + typo(scraped['quality'], '_ [] color kod')
if scraped['episode']:
scraped['episode'] = re.sub(r'\s-\s|-|x|&#8211', 'x', scraped['episode'])
longtitle = typo(scraped['episode'] + ' - ', 'bold') + longtitle
if scraped['title2']:
title2 = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped['title2'])
.replace('"', "'").replace('×', 'x').replace('','-')).strip()
longtitle = longtitle + typo(title2, 'bold _ -- _')
## Aggiunto/modificato per gestire i siti che hanno i video
## in ita e subita delle serie tv nella stessa pagina
if scraped['lang']:
if 'sub' in scraped['lang'].lower():
lang = 'Sub-ITA'
else:
lang = 'ITA'
if lang != '':
longtitle += typo(lang, '_ [] color kod')
# if title is set, probably this is a list of episodes or video sources
if item.infoLabels["title"] or item.fulltitle:
infolabels = item.infoLabels
else:
infolabels = {}
if scraped['year']:
infolabels['year'] = scraped['year']
if scraped["plot"]:
infolabels['plot'] = plot
if scraped['duration']:
matches = scrapertoolsV2.find_multiple_matches(scraped['duration'],
r'([0-9])\s*?(?:[hH]|:|\.|,|\\|\/|\||\s)\s*?([0-9]+)')
for h, m in matches:
scraped['duration'] = int(h) * 60 + int(m)
if not matches:
scraped['duration'] = scrapertoolsV2.find_single_match(scraped['duration'], r'(\d+)')
infolabels['duration'] = int(scraped['duration']) * 60
if scraped['genere']:
genres = scrapertoolsV2.find_multiple_matches(scraped['genere'], '[A-Za-z]+')
infolabels['genere'] = ", ".join(genres)
if scraped["rating"]:
infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scraped["rating"])
if typeContentDict:
for name, variants in typeContentDict.items():
if scraped['type'] in variants:
item.contentType = name
if typeActionDict:
for name, variants in typeActionDict.items():
if scraped['type'] in variants:
action = name
if scraped["title"]:
if scraped["title"] not in blacklist:
it = Item(
channel=item.channel,
action=action,
contentType= 'episode' if (action == 'findvideos' and item.contentType == 'tvshow') else item.contentType,
title=longtitle,
fulltitle=item.fulltitle if (action == 'findvideos' and item.contentType != 'movie') else title,
show=item.show if (action == 'findvideos' and item.contentType != 'movie') else title,
quality=scraped["quality"],
url=scraped["url"],
infoLabels=infolabels,
thumbnail=scraped["thumb"],
args=item.args,
contentSerieName = title if (action == 'episodios' and item.contentType != 'movie') else ''
)
for lg in list(set(listGroups).difference(known_keys)):
it.__setattr__(lg, match[listGroups.index(lg)])
if 'itemHook' in args:
it = args['itemHook'](it)
itemlist.append(it)
checkHost(item, itemlist)
if (item.contentType == "tvshow" and (action != "findvideos" and action != "play")) \
or (item.contentType == "episode" and action != "play") \
or (item.contentType == "movie" and action != "play") :
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
# else: # Si perde item show :(
# for it in itemlist:
# it.infoLabels = item.infoLabels
if 'itemlistHook' in args:
itemlist = args['itemlistHook'](itemlist)
if patronNext:
nextPage(itemlist, item, data, patronNext, 2)
# next page for pagination
if pagination and len(matches) >= pag * pagination:
itemlist.append(
Item(channel=item.channel,
action = item.action,
contentType=item.contentType,
title=typo(config.get_localized_string(30992), 'color kod bold'),
url=item.url,
args=item.args,
page=pag + 1,
thumbnail=thumb()))
if anime:
from specials import autorenumber
if inspect.stack()[1][3] == 'episodios' or item.action == 'episodios': autorenumber.renumber(itemlist, item, 'bold')
else: autorenumber.renumber(itemlist)
if addVideolibrary and (item.infoLabels["title"] or item.fulltitle):
item.fulltitle = item.infoLabels["title"]
videolibrary(itemlist, item)
if 'patronMenu' in args:
itemlist = thumb(itemlist, genre=True)
if 'fullItemlistHook' in args:
itemlist = args['fullItemlistHook'](itemlist)
if 'fullItemlistHook' in args:
itemlist = args['fullItemlistHook'](itemlist)
return itemlist
@@ -604,7 +622,7 @@ def menu(func):
args=var[2] if len(var) > 2 else '',
contentType= var[3] if len(var) > 3 else 'movie',)
# add search menu for category
if 'search' not in args: menuItem(itemlist, filename, 'Cerca ' + title + '… submenu bold', 'search', host, args=name)
if 'search' not in args: menuItem(itemlist, filename, 'Cerca ' + title + '… submenu bold', 'search', host + url, args=name)
# Make EXTRA MENU (on bottom)
for name, var in args.items():