changes to support
This commit is contained in:
@@ -120,37 +120,10 @@ def video(item):
|
||||
logger.info('[filmsenzalimiti.py] video')
|
||||
itemlist = []
|
||||
|
||||
data = httptools.downloadpage(item.url).data.replace('\t','').replace('\n','')
|
||||
logger.info('[filmsenzalimiti.py] video' +data)
|
||||
|
||||
patron = '<div class="col-mt-5 postsh">.*?<a href="([^"]+)" title="([^"]+)">.*?<span class="rating-number">(.*?)<.*?<img src="([^"]+)"'
|
||||
patronNext = '<a href="([^"]+)"><i class="glyphicon glyphicon-chevron-right"'
|
||||
|
||||
matches = re.compile(patron, re.DOTALL).findall(data)
|
||||
|
||||
for scrapedurl, scrapedtitle, scrapedrating, scrapedthumbnail in matches:
|
||||
scrapedthumbnail = httptools.get_url_headers(scrapedthumbnail)
|
||||
scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle).strip()
|
||||
scrapedrating = scrapertools.decodeHtmlentities(scrapedrating)
|
||||
|
||||
itemlist.append(
|
||||
Item(channel=item.channel,
|
||||
action='findvideos',
|
||||
title=scrapedtitle + ' (' + scrapedrating + ')',
|
||||
fulltitle=scrapedtitle,
|
||||
url=scrapedurl,
|
||||
show=scrapedtitle,
|
||||
contentType=item.contentType,
|
||||
thumbnail=scrapedthumbnail), tipo='movie')
|
||||
|
||||
patron = '<a href="([^"]+)"><i class="glyphicon glyphicon-chevron-right"'
|
||||
next_page = scrapertools.find_single_match(data, patron)
|
||||
if next_page != '':
|
||||
itemlist.append(
|
||||
Item(channel=item.channel,
|
||||
action='video',
|
||||
title='[COLOR lightgreen]' + config.get_localized_string(30992) + '[/COLOR]',
|
||||
contentType=item.contentType,
|
||||
url=next_page))
|
||||
support.scrape(item, itemlist, patron, ['url', 'title', 'rating', 'thumb'], patronNext=patronNext)
|
||||
|
||||
return itemlist
|
||||
|
||||
|
||||
@@ -84,13 +84,18 @@ def color(text, color):
|
||||
return "[COLOR " + color + "]" + text + "[/COLOR]"
|
||||
|
||||
|
||||
def scrape(item, itemlist, patron, listGroups, headers="", blacklist="", data="", patron_block="", patronNext="", action="findvideos", url_host=""):
|
||||
def scrape(item, patron = '', listGroups = [], headers="", blacklist="", data="", patron_block="",
|
||||
patronNext="", action="findvideos", url_host=""):
|
||||
# patron: the patron to use for scraping page, all capturing group must match with listGroups
|
||||
# listGroups: a list containing the scraping info obtained by your patron, in order
|
||||
# accepted values are: url, title, thumb, quality, year, plot, duration, genre
|
||||
# accepted values are: url, title, thumb, quality, year, plot, duration, genre, rating
|
||||
|
||||
# header: values to pass to request header
|
||||
# blacklist: titles that you want to exclude(service articles for example)
|
||||
# data: if you want to pass data manually, maybe because you need some custom replacement
|
||||
# patron_block: patron to get parts of the page (to scrape with patron attribute),
|
||||
# if you need a "block inside another block" you can create a list, please note that all matches
|
||||
# will be packed as string
|
||||
# patronNext: patron for scraping next page link
|
||||
# action: if you want results perform an action different from "findvideos", useful when scraping film by genres
|
||||
# url_host: string to prepend to scrapedurl, useful when url don't contain host
|
||||
@@ -100,77 +105,92 @@ def scrape(item, itemlist, patron, listGroups, headers="", blacklist="", data=""
|
||||
# patron = 'blablabla'
|
||||
# headers = [['Referer', host]]
|
||||
# blacklist = 'Request a TV serie!'
|
||||
# support.scrape(item, itemlist, patron, ['thumb', 'quality', 'url', 'title', 'year', 'plot'], headers=headers, blacklist=blacklist)
|
||||
# return itemlist
|
||||
# return data for debugging purposes
|
||||
# return support.scrape(item, itemlist, patron, ['thumb', 'quality', 'url', 'title', 'year', 'plot'],
|
||||
# headers=headers, blacklist=blacklist)
|
||||
|
||||
itemlist = []
|
||||
|
||||
if not data:
|
||||
data = httptools.downloadpage(item.url, headers=headers).data.replace("'", '"')
|
||||
# replace all ' with ", so we don't need to worry about
|
||||
data = re.sub('\n|\t', '', data)
|
||||
# replace all ' with " and eliminate newline, so we don't need to worry about
|
||||
logger.info('DATA ='+data)
|
||||
|
||||
if patron_block:
|
||||
block = scrapertoolsV2.get_match(data, patron_block)
|
||||
else:
|
||||
block = data
|
||||
if patron_block:
|
||||
if type(patron_block) == str:
|
||||
patron_block = [patron_block]
|
||||
|
||||
matches = scrapertoolsV2.find_multiple_matches(block, patron)
|
||||
for n, regex in enumerate(patron_block):
|
||||
blocks = scrapertoolsV2.find_multiple_matches(data, regex)
|
||||
data = str(blocks)
|
||||
logger.info('BLOCK '+str(n)+'=' + data)
|
||||
if patron and listGroups:
|
||||
matches = scrapertoolsV2.find_multiple_matches(data, patron)
|
||||
logger.info('MATCHES ='+str(matches))
|
||||
|
||||
for match in matches:
|
||||
scrapedurl = url_host+match[listGroups.index('url')] if 'url' in listGroups else ''
|
||||
scrapedtitle = match[listGroups.index('title')] if 'title' in listGroups else ''
|
||||
scrapedthumb = match[listGroups.index('thumb')] if 'thumb' in listGroups else ''
|
||||
scrapedquality = match[listGroups.index('quality')] if 'quality' in listGroups else ''
|
||||
scrapedyear = match[listGroups.index('year')] if 'year' in listGroups else ''
|
||||
scrapedplot = match[listGroups.index('plot')] if 'plot' in listGroups else ''
|
||||
scrapedduration = match[listGroups.index('duration')] if 'duration' in listGroups else ''
|
||||
scrapedgenre = match[listGroups.index('genre')] if 'genre' in listGroups else ''
|
||||
for match in matches:
|
||||
if len(listGroups) > len(match): # to fix a bug
|
||||
match = list(match)
|
||||
match.extend([''] * (len(listGroups)-len(match)))
|
||||
|
||||
title = scrapertoolsV2.decodeHtmlentities(scrapedtitle)
|
||||
plot = scrapertoolsV2.decodeHtmlentities(scrapedplot)
|
||||
if scrapedquality:
|
||||
longtitle = '[B]' + title + '[/B] [COLOR blue][' + scrapedquality + '][/COLOR]'
|
||||
else:
|
||||
longtitle = '[B]' + title + '[/B]'
|
||||
scrapedurl = url_host+match[listGroups.index('url')] if 'url' in listGroups else ''
|
||||
scrapedtitle = match[listGroups.index('title')] if 'title' in listGroups else ''
|
||||
scrapedthumb = match[listGroups.index('thumb')] if 'thumb' in listGroups else ''
|
||||
scrapedquality = match[listGroups.index('quality')] if 'quality' in listGroups else ''
|
||||
scrapedyear = match[listGroups.index('year')] if 'year' in listGroups else ''
|
||||
scrapedplot = match[listGroups.index('plot')] if 'plot' in listGroups else ''
|
||||
scrapedduration = match[listGroups.index('duration')] if 'duration' in listGroups else ''
|
||||
scrapedgenre = match[listGroups.index('genre')] if 'genre' in listGroups else ''
|
||||
scrapedrating = match[listGroups.index('rating')] if 'rating' in listGroups else ''
|
||||
|
||||
infolabels = {}
|
||||
if scrapedyear:
|
||||
infolabels['year'] = scrapedyear
|
||||
if scrapedplot:
|
||||
infolabels['plot'] = plot
|
||||
if scrapedduration:
|
||||
infolabels['duration'] = scrapedduration
|
||||
if scrapedgenre:
|
||||
infolabels['genre'] = scrapertoolsV2.find_multiple_matches(scrapedgenre, '(?:<[^<]+?>)?([^<>]+)') # delete all html tags and match text
|
||||
if not scrapedtitle in blacklist:
|
||||
itemlist.append(
|
||||
Item(channel=item.channel,
|
||||
action=action,
|
||||
contentType=item.contentType,
|
||||
title=longtitle,
|
||||
fulltitle=title,
|
||||
show=title,
|
||||
quality=scrapedquality,
|
||||
url=scrapedurl,
|
||||
infoLabels=infolabels,
|
||||
thumbnail=scrapedthumb
|
||||
)
|
||||
)
|
||||
title = scrapertoolsV2.decodeHtmlentities(scrapedtitle)
|
||||
plot = scrapertoolsV2.decodeHtmlentities(scrapedplot)
|
||||
if scrapedquality:
|
||||
longtitle = '[B]' + title + '[/B] [COLOR blue][' + scrapedquality + '][/COLOR]'
|
||||
else:
|
||||
longtitle = '[B]' + title + '[/B]'
|
||||
|
||||
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
|
||||
infolabels = {}
|
||||
if scrapedyear:
|
||||
infolabels['year'] = scrapedyear
|
||||
if scrapedplot:
|
||||
infolabels['plot'] = plot
|
||||
if scrapedduration:
|
||||
infolabels['duration'] = scrapedduration
|
||||
if scrapedgenre:
|
||||
infolabels['genre'] = scrapertoolsV2.find_multiple_matches(scrapedgenre, '(?:<[^<]+?>)?([^<>]+)') # delete all html tags and match text
|
||||
if scrapedrating:
|
||||
infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scrapedrating)
|
||||
if not scrapedtitle in blacklist:
|
||||
itemlist.append(
|
||||
Item(channel=item.channel,
|
||||
action=action,
|
||||
contentType=item.contentType,
|
||||
title=longtitle,
|
||||
fulltitle=title,
|
||||
show=title,
|
||||
quality=scrapedquality,
|
||||
url=scrapedurl,
|
||||
infoLabels=infolabels,
|
||||
thumbnail=scrapedthumb
|
||||
)
|
||||
)
|
||||
|
||||
if patronNext:
|
||||
next_page = scrapertoolsV2.find_single_match(data, patronNext)
|
||||
logger.info('NEXT ' + next_page)
|
||||
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
|
||||
|
||||
if next_page != "":
|
||||
itemlist.append(
|
||||
Item(channel=item.channel,
|
||||
action="peliculas",
|
||||
contentType=item.contentType,
|
||||
title="[COLOR blue]" + config.get_localized_string(30992) + " >[/COLOR]",
|
||||
url=next_page))
|
||||
if patronNext:
|
||||
next_page = scrapertoolsV2.find_single_match(data, patronNext)
|
||||
logger.info('NEXT ' + next_page)
|
||||
|
||||
return block
|
||||
if next_page != "":
|
||||
itemlist.append(
|
||||
Item(channel=item.channel,
|
||||
action="peliculas",
|
||||
contentType=item.contentType,
|
||||
title="[COLOR blue]" + config.get_localized_string(30992) + " >[/COLOR]",
|
||||
url=next_page))
|
||||
|
||||
return itemlist
|
||||
|
||||
|
||||
def dooplay_get_links(item, host):
|
||||
@@ -201,16 +221,16 @@ def dooplay_get_links(item, host):
|
||||
return ret
|
||||
|
||||
|
||||
def dooplay_films(item, itemlist, blacklist=""):
|
||||
def dooplay_films(item, blacklist=""):
|
||||
patron = '<article id="post-[0-9]+" class="item movies">.*?<img src="([^"]+)".*?<span class="quality">([^<>]+).*?<a href="([^"]+)">([^<>]+)</a></h3> (?:<span>([0-9]{4})</span>)?.*?(?:<span>([0-9]+) min</span>)?.*?(?:<div class="texto">([^<>]+).*?)?(?:genres">(.*?)</div>)?'
|
||||
patronNext = '<a class="arrow_pag" href="([^"]+)"><i id="nextpagination"'
|
||||
scrape(item, itemlist, patron, ['thumb', 'quality', 'url', 'title', 'year', 'duration', 'plot', 'genre'], blacklist=blacklist, patronNext=patronNext)
|
||||
return scrape(item, patron, ['thumb', 'quality', 'url', 'title', 'year', 'duration', 'plot', 'genre'], blacklist=blacklist, patronNext=patronNext)
|
||||
|
||||
|
||||
def dooplay_search(item, itemlist, blacklist=""):
|
||||
def dooplay_search(item, blacklist=""):
|
||||
patron = '<div class="result-item">.*?<img src="([^"]+)".*?<span class="movies">([^<>]+).*?<a href="([^"]+)">([^<>]+)</a>.*?<span class="year">([0-9]{4}).*?<div class="contenido"><p>([^<>]+)'
|
||||
patronNext = '<a class="arrow_pag" href="([^"]+)"><i id="nextpagination"'
|
||||
scrape(item, itemlist, patron, ['thumb', 'quality', 'url', 'title', 'year', 'plot'], blacklist=blacklist, patronNext=patronNext)
|
||||
return scrape(item, patron, ['thumb', 'quality', 'url', 'title', 'year', 'plot'], blacklist=blacklist, patronNext=patronNext)
|
||||
|
||||
|
||||
def swzz_get_url(item):
|
||||
|
||||
@@ -84,21 +84,13 @@ def search(item, text):
|
||||
logger.info("[vedohd.py] " + item.url + " search " + text)
|
||||
item.url = item.url + "/?s=" + text
|
||||
|
||||
itemlist = []
|
||||
|
||||
support.dooplay_search(item, itemlist, blacklist)
|
||||
|
||||
|
||||
return itemlist
|
||||
return support.dooplay_search(item, blacklist)
|
||||
|
||||
|
||||
def peliculas(item):
|
||||
logger.info("[vedohd.py] video")
|
||||
itemlist = []
|
||||
|
||||
support.dooplay_films(item, itemlist, blacklist)
|
||||
|
||||
return itemlist
|
||||
return support.dooplay_films(item, blacklist)
|
||||
|
||||
|
||||
def findvideos(item):
|
||||
@@ -130,17 +122,13 @@ def findvideos(item):
|
||||
|
||||
|
||||
def generos(item):
|
||||
itemlist = []
|
||||
patron = '<a href="([^"#]+)">([a-zA-Z]+)'
|
||||
support.scrape(item, itemlist, patron, ['url', 'title'], patron_block='<a href="#">Genere</a><ul class="sub-menu">.*?</ul>', action='peliculas', url_host=host)
|
||||
return itemlist
|
||||
return support.scrape(item, patron, ['url', 'title'], patron_block='<a href="#">Genere</a><ul class="sub-menu">.*?</ul>', action='peliculas', url_host=host)
|
||||
|
||||
|
||||
def year(item):
|
||||
itemlist = []
|
||||
patron = '<a href="([^"#]+)">([a-zA-Z]+)'
|
||||
support.scrape(item, itemlist, patron, ['url', 'title'], patron_block='<a href="#">Anno</a><ul class="sub-menu">.*?</ul>', action='peliculas', url_host=host)
|
||||
return itemlist
|
||||
return support.scrape(item, patron, ['url', 'title'], patron_block='<a href="#">Anno</a><ul class="sub-menu">.*?</ul>', action='peliculas', url_host=host)
|
||||
|
||||
|
||||
def play(item):
|
||||
|
||||
Reference in New Issue
Block a user