Merge pull request #10 from thepasto/master

Support scrape - improvements
This commit is contained in:
cttynul
2019-05-03 17:24:03 +02:00
committed by GitHub
2 changed files with 221 additions and 101 deletions

View File

@@ -4,77 +4,187 @@
# ------------------------------------------------------------
import re
from channels import filtertools
from core import scrapertools, servertools, httptools
from channels import filtertools, support, autoplay
from core import scrapertools, servertools, httptools, scrapertoolsV2
from core.item import Item
from platformcode import config
from core import tmdb
host = 'https://cinemastreaming.info'
host = 'https://cinemastreaming.icu'
IDIOMAS = {'Italiano': 'IT'}
list_language = IDIOMAS.values()
list_servers = ['openload', 'streamango']
list_quality = ['1080p', '1080p 3D', 'SD', 'CAM', 'default']
headers = [['Referer', host]]
def mainlist(item):
log()
support.log()
# Menu Principale
itemlist = []
support.menu(itemlist, 'Film bold', 'peliculas', host + '/film/')
support.menu(itemlist, 'Per genere submenu', 'menu', host, args="Film per Genere")
support.menu(itemlist, 'Anime bold', 'peliculas', host + '/category/anime/')
support.menu(itemlist, 'Serie TV bold', 'peliculas', host + '/serie-tv/', contentType='episode')
support.menu(itemlist, 'Ultime Uscite submenu', 'peliculas', host + "/stagioni/", "episode", args='latests')
support.menu(itemlist, 'Ultimi Episodi submenu', 'peliculas_latest_ep', host + "/episodi/", "episode", args='lateste')
support.menu(itemlist, '[COLOR blue]Cerca...[/COLOR]', 'search')
itemlist = [Item(channel = item.channel,
contentType = 'movie',
title = 'Film',
url = host + '/film/',
action = 'video',
thumbnail = '',
fanart = ''
),
]
return itemlist
def video(item):
log()
itemlist = [] # Creo una lista Vuota
# Carica la pagina
data = httptools.downloadpage(item.url, headers=headers).data
block = scrapertools.find_single_match(data, r'<main>(.*?)<\/main>')
block = re.sub('\t|\n', '', block)
patron = r'<article.*?class="TPost C">.*?<a href="([^"]+)">.*?src="([^"]+)".*?>.*?<h3 class="Title">([^<]+)<\/h3>(.*?)<\/article>'
matches = re.compile(patron, re.DOTALL).findall(block)
for scrapedurl, scrapedthumb, scrapedtitle, scrapedinfo in matches:
log('Info Block', scrapedinfo)
patron = r'<span class="Year">(.*?)<\/span>.*?<span class="Vote.*?">(.*?)<\/span>.*?<div class="Description"><p>(.*?)<\/p>.*?<p class="Genre.*?">(.*?)<\/p><p class="Director.*?">.*?<a.*?>(.*?)<\/a>.*?<p class="Actors.*?">(.*?)<\/p>'
info = re.compile(patron, re.DOTALL).findall(scrapedinfo)
for year, rating, plot, genre, director, cast in info:
genre = scrapertools.find_multiple_matches(genre, r'<a.*?>(.*?)<\/a>')
cast = scrapertools.find_multiple_matches(cast, r'<a.*?>(.*?)<\/a>')
infoLabels = {}
infoLabels['Year'] = year
infoLabels['Rating'] = rating
infoLabels['Plot'] = plot
infoLabels['Genre'] = genre
infoLabels['Director'] = director
infoLabels['Cast'] = cast
itemlist.append(
Item(channel=item.channel,
action="findvideos",
contentType=item.contentType,
title=scrapedtitle,
fulltitle=scrapedtitle,
url=scrapedurl,
thumbnail=scrapedthumb,
infoLabels = infoLabels,
show=scrapedtitle))
autoplay.init(item.channel, list_servers, list_quality)
autoplay.show_option(item.channel, itemlist)
return itemlist
def log(stringa1="", stringa2=""):
import inspect, os
from platformcode import logger
logger.info("[" + os.path.basename(__file__) + "] - [" + inspect.stack()[1][3] + "] " + str(stringa1) + str(stringa2))
def peliculas(item):
support.log()
list_groups = ["url", "thumb", "title", "year", "rating", "duration"]
patron = r'<article.*?"TPost C".*?href="([^"]+)".*?img.*?src="([^"]+)".*?<h3.*?>([^<]+).*?Year">'
if item.args == "latests":
patron += r'([^<]+)'
else:
patron += r'(\d{4}).*?AAIco-star.*?>([^<]+).*?AAIco-access_time">([^<]+).*?Qlty'
patron_next = r'page-numbers current.*?href="([^"]+)"'
if item.contentType == "movie":
patron += r'\">([^<]+)'
list_groups.append("quality")
action = "findvideos" if item.contentType == "movie" else "episodios"
return support.scrape(item, patron, list_groups, patronNext=patron_next, action=action)
def peliculas_latest_ep(item):
patron = r'<article.*?"TPost C".*?href="([^"]+)".*?img.*?src="([^"]+)"'
patron += r'.*?class="ClB">([^<]+)<\/span>([^<]+).*?<h3.*?>([^<]+)'
data = httptools.downloadpage(item.url).data
matches = re.compile(patron, re.DOTALL).findall(data)
itemlist = []
for scrapedurl, scrapedthumbnail, scrapednum, scrapedep, scrapedtitle in matches:
itemlist.append(
Item(channel=item.channel,
action="findvideos",
contentType=item.contentType,
title="[B]" + scrapednum + "[/B]" + scrapedep + " - " + scrapedtitle,
fulltitle=scrapedep + " " + scrapedtitle,
show=scrapedep + " " + scrapedtitle,
url=scrapedurl,
extra=item.extra,
thumbnail="http:" + scrapedthumbnail,
infoLabels=item.infoLabels
))
support.nextPage(itemlist, item, data, r'page-numbers current.*?href="([^"]+)"')
return itemlist
def peliculas_menu(item):
itemlist = peliculas(item)
return itemlist[:-1]
def episodios(item):
patron = r'<td class="MvTbTtl"><a href="([^"]+)">(.*?)<\/a>.*?>\d{4}<'
list_groups = ["url", "title", "year"]
itemlist = support.scrape(item, patron, list_groups)
for itm in itemlist:
fixedtitle = scrapertools.get_season_and_episode(itm.url)
itm.title = fixedtitle + " - " + itm.title
itm.fulltitle = fixedtitle + " - " + itm.fulltitle
return itemlist
def menu(item):
patron_block = r'<ul class="sub-menu">.*?</ul>'
patron = r'menu-category-list"><a href="([^"]+)">([^<]+)<'
list_groups = ["url", "title"]
return support.scrape(item, patron, list_groups, blacklist="Anime", action="peliculas_menu", patron_block=patron_block)
def search(item, texto):
support.log("s=", texto)
item.url = host + "/?s=" + texto
try:
return peliculas(item)
# Continua la ricerca in caso di errore
except Exception, e:
import traceback
traceback.print_stack()
support.log(str(e))
return []
def newest(categoria):
support.log("newest" + categoria)
itemlist = []
item = Item()
try:
if categoria == "series":
item.url = host + "/episodi/"
item.action = "peliculas"
item.args = "lateste"
item.contentType = "episode"
itemlist = peliculas(item)
if itemlist[-1].action == "peliculas":
itemlist.pop()
# Continua la ricerca in caso di errore
except Exception, e:
import traceback
traceback.print_stack()
support.log(str(e))
return []
return itemlist
def findvideos(item):
if item.quality.lower() in ["ended", "canceled", "returning series"]:
return episodios(item)
itemlist = []
data = scrapertoolsV2.decodeHtmlentities(httptools.downloadpage(item.url).data)
btns = re.compile(r'data-tplayernv="Opt.*?><span>([^<]+)</span><span>([^<]+)</span>', re.DOTALL).findall(data)
matches = re.compile(r'<iframe.*?src="([^"]+trembed=[^"]+)', re.DOTALL).findall(data)
for i, scrapedurl in enumerate(matches):
scrapedurl = scrapertoolsV2.decodeHtmlentities(scrapedurl)
patron = r'<iframe.*?src="([^"]+)"'
link_data = httptools.downloadpage(scrapedurl).data
url = scrapertoolsV2.find_single_match(link_data, patron)
itemlist.append(
Item(channel=item.channel,
action="play",
contentType=item.contentType,
title="[B]" + btns[i][0] + "[/B] - " + btns[i][1],
fulltitle=btns[i][0] + " " + btns[i][1],
show=btns[i][0] + " " + btns[i][1],
url=url,
extra=item.extra,
infoLabels=item.infoLabels,
server=btns[i][0],
contentQuality=btns[i][1].replace('Italiano - ', ''),
))
if item.contentType == "movie":
support.videolibrary(itemlist, item)
autoplay.start(itemlist, item)
return itemlist

View File

@@ -135,25 +135,24 @@ def scrape(item, patron = '', listGroups = [], headers="", blacklist="", data=""
matches = scrapertoolsV2.find_multiple_matches(block, patron)
log('MATCHES =', matches)
known_keys = ['url', 'title', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere', 'rating']
for match in matches:
if len(listGroups) > len(match): # to fix a bug
match = list(match)
match.extend([''] * (len(listGroups)-len(match)))
match.extend([''] * (len(listGroups) - len(match)))
scrapedurl = url_host+match[listGroups.index('url')] if 'url' in listGroups else ''
scrapedtitle = match[listGroups.index('title')] if 'title' in listGroups else ''
scrapedthumb = match[listGroups.index('thumb')] if 'thumb' in listGroups else ''
scrapedquality = match[listGroups.index('quality')] if 'quality' in listGroups else ''
scrapedyear = match[listGroups.index('year')] if 'year' in listGroups else ''
scrapedplot = match[listGroups.index('plot')] if 'plot' in listGroups else ''
scrapedduration = match[listGroups.index('duration')] if 'duration' in listGroups else ''
scrapedgenre = match[listGroups.index('genre')] if 'genre' in listGroups else ''
scrapedrating = match[listGroups.index('rating')] if 'rating' in listGroups else ''
scraped = {}
for kk in known_keys:
val = match[listGroups.index(kk)] if kk in listGroups else ''
if kk == "url":
val = url_host + val
scraped[kk] = val
title = scrapertoolsV2.decodeHtmlentities(scrapedtitle)
plot = scrapertoolsV2.decodeHtmlentities(scrapedplot)
if scrapedquality:
longtitle = '[B]' + title + '[/B] [COLOR blue][' + scrapedquality + '][/COLOR]'
title = scrapertoolsV2.decodeHtmlentities(scraped["title"]).strip()
plot = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped["plot"]))
if scraped["quality"]:
longtitle = '[B]' + title + '[/B] [COLOR blue][' + scraped["quality"] + '][/COLOR]'
else:
longtitle = '[B]' + title + '[/B]'
@@ -161,37 +160,48 @@ def scrape(item, patron = '', listGroups = [], headers="", blacklist="", data=""
infolabels = item.infoLabels
else:
infolabels = {}
if scrapedyear:
infolabels['year'] = scrapedyear
if scrapedplot:
if scraped["year"]:
infolabels['year'] = scraped["year"]
if scraped["plot"]:
infolabels['plot'] = plot
if scrapedduration:
matches = scrapertoolsV2.find_multiple_matches(scrapedduration, r'([0-9])\s*?(?:[hH]|:|\.|,|\\|\/|\||\s)\s*?([0-9]+)')
if scraped["duration"]:
matches = scrapertoolsV2.find_multiple_matches(scraped["duration"],r'([0-9])\s*?(?:[hH]|:|\.|,|\\|\/|\||\s)\s*?([0-9]+)')
for h, m in matches:
scrapedduration = int(h) * 60 + int(m)
infolabels['duration'] = int(scrapedduration) * 60
if scrapedgenre:
genres = scrapertoolsV2.find_multiple_matches(scrapedgenre, '[A-Za-z]+')
infolabels['genre'] = ", ".join(genres)
if scrapedrating:
infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scrapedrating)
scraped["duration"] = int(h) * 60 + int(m)
if not matches:
scraped["duration"] = scrapertoolsV2.find_single_match(scraped["duration"], r'(\d+)')
infolabels['duration'] = int(scraped["duration"]) * 60
if scraped["genere"]:
genres = scrapertoolsV2.find_multiple_matches(scraped["genere"], '[A-Za-z]+')
infolabels['genere'] = ", ".join(genres)
if scraped["rating"]:
infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scraped["rating"])
if not scrapedtitle in blacklist:
itemlist.append(
Item(channel=item.channel,
action=action,
contentType=item.contentType,
title=longtitle,
fulltitle=title,
show=title,
quality=scrapedquality,
url=scrapedurl,
infoLabels=infolabels,
thumbnail=scrapedthumb
)
if scraped["title"] not in blacklist:
it = Item(
channel=item.channel,
action=action,
contentType=item.contentType,
title=longtitle,
fulltitle=title,
show=title,
quality=scraped["quality"],
url=scraped["url"],
infoLabels=infolabels,
thumbnail=scraped["thumb"]
)
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
for lg in list(set(listGroups).difference(known_keys)):
it.__setattr__(lg, match[listGroups.index(lg)])
itemlist.append(it)
if (item.contentType == "episode" and (action != "findvideos" and action != "play")) \
or (item.contentType == "movie" and action != "play"):
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
else:
for it in itemlist:
it.infoLabels = item.infoLabels
if patronNext:
nextPage(itemlist, item, data, patronNext, 2)