Merge pull request #10 from thepasto/master
Support scrape - improvements
This commit is contained in:
@@ -4,77 +4,187 @@
|
||||
# ------------------------------------------------------------
|
||||
import re
|
||||
|
||||
from channels import filtertools
|
||||
from core import scrapertools, servertools, httptools
|
||||
from channels import filtertools, support, autoplay
|
||||
from core import scrapertools, servertools, httptools, scrapertoolsV2
|
||||
from core.item import Item
|
||||
from platformcode import config
|
||||
from core import tmdb
|
||||
|
||||
host = 'https://cinemastreaming.info'
|
||||
host = 'https://cinemastreaming.icu'
|
||||
|
||||
IDIOMAS = {'Italiano': 'IT'}
|
||||
list_language = IDIOMAS.values()
|
||||
list_servers = ['openload', 'streamango']
|
||||
list_quality = ['1080p', '1080p 3D', 'SD', 'CAM', 'default']
|
||||
|
||||
headers = [['Referer', host]]
|
||||
|
||||
|
||||
def mainlist(item):
|
||||
log()
|
||||
support.log()
|
||||
|
||||
# Menu Principale
|
||||
itemlist = []
|
||||
support.menu(itemlist, 'Film bold', 'peliculas', host + '/film/')
|
||||
support.menu(itemlist, 'Per genere submenu', 'menu', host, args="Film per Genere")
|
||||
support.menu(itemlist, 'Anime bold', 'peliculas', host + '/category/anime/')
|
||||
support.menu(itemlist, 'Serie TV bold', 'peliculas', host + '/serie-tv/', contentType='episode')
|
||||
support.menu(itemlist, 'Ultime Uscite submenu', 'peliculas', host + "/stagioni/", "episode", args='latests')
|
||||
support.menu(itemlist, 'Ultimi Episodi submenu', 'peliculas_latest_ep', host + "/episodi/", "episode", args='lateste')
|
||||
support.menu(itemlist, '[COLOR blue]Cerca...[/COLOR]', 'search')
|
||||
|
||||
itemlist = [Item(channel = item.channel,
|
||||
contentType = 'movie',
|
||||
title = 'Film',
|
||||
url = host + '/film/',
|
||||
action = 'video',
|
||||
thumbnail = '',
|
||||
fanart = ''
|
||||
),
|
||||
]
|
||||
|
||||
return itemlist
|
||||
|
||||
def video(item):
|
||||
log()
|
||||
|
||||
itemlist = [] # Creo una lista Vuota
|
||||
|
||||
# Carica la pagina
|
||||
data = httptools.downloadpage(item.url, headers=headers).data
|
||||
block = scrapertools.find_single_match(data, r'<main>(.*?)<\/main>')
|
||||
block = re.sub('\t|\n', '', block)
|
||||
|
||||
patron = r'<article.*?class="TPost C">.*?<a href="([^"]+)">.*?src="([^"]+)".*?>.*?<h3 class="Title">([^<]+)<\/h3>(.*?)<\/article>'
|
||||
matches = re.compile(patron, re.DOTALL).findall(block)
|
||||
|
||||
for scrapedurl, scrapedthumb, scrapedtitle, scrapedinfo in matches:
|
||||
log('Info Block', scrapedinfo)
|
||||
patron = r'<span class="Year">(.*?)<\/span>.*?<span class="Vote.*?">(.*?)<\/span>.*?<div class="Description"><p>(.*?)<\/p>.*?<p class="Genre.*?">(.*?)<\/p><p class="Director.*?">.*?<a.*?>(.*?)<\/a>.*?<p class="Actors.*?">(.*?)<\/p>'
|
||||
info = re.compile(patron, re.DOTALL).findall(scrapedinfo)
|
||||
for year, rating, plot, genre, director, cast in info:
|
||||
genre = scrapertools.find_multiple_matches(genre, r'<a.*?>(.*?)<\/a>')
|
||||
cast = scrapertools.find_multiple_matches(cast, r'<a.*?>(.*?)<\/a>')
|
||||
|
||||
infoLabels = {}
|
||||
infoLabels['Year'] = year
|
||||
infoLabels['Rating'] = rating
|
||||
infoLabels['Plot'] = plot
|
||||
infoLabels['Genre'] = genre
|
||||
infoLabels['Director'] = director
|
||||
infoLabels['Cast'] = cast
|
||||
|
||||
itemlist.append(
|
||||
Item(channel=item.channel,
|
||||
action="findvideos",
|
||||
contentType=item.contentType,
|
||||
title=scrapedtitle,
|
||||
fulltitle=scrapedtitle,
|
||||
url=scrapedurl,
|
||||
thumbnail=scrapedthumb,
|
||||
infoLabels = infoLabels,
|
||||
show=scrapedtitle))
|
||||
autoplay.init(item.channel, list_servers, list_quality)
|
||||
autoplay.show_option(item.channel, itemlist)
|
||||
|
||||
return itemlist
|
||||
|
||||
|
||||
def log(stringa1="", stringa2=""):
|
||||
import inspect, os
|
||||
from platformcode import logger
|
||||
logger.info("[" + os.path.basename(__file__) + "] - [" + inspect.stack()[1][3] + "] " + str(stringa1) + str(stringa2))
|
||||
def peliculas(item):
|
||||
support.log()
|
||||
list_groups = ["url", "thumb", "title", "year", "rating", "duration"]
|
||||
|
||||
patron = r'<article.*?"TPost C".*?href="([^"]+)".*?img.*?src="([^"]+)".*?<h3.*?>([^<]+).*?Year">'
|
||||
|
||||
if item.args == "latests":
|
||||
patron += r'([^<]+)'
|
||||
else:
|
||||
patron += r'(\d{4}).*?AAIco-star.*?>([^<]+).*?AAIco-access_time">([^<]+).*?Qlty'
|
||||
|
||||
patron_next = r'page-numbers current.*?href="([^"]+)"'
|
||||
|
||||
if item.contentType == "movie":
|
||||
patron += r'\">([^<]+)'
|
||||
list_groups.append("quality")
|
||||
|
||||
action = "findvideos" if item.contentType == "movie" else "episodios"
|
||||
|
||||
return support.scrape(item, patron, list_groups, patronNext=patron_next, action=action)
|
||||
|
||||
|
||||
def peliculas_latest_ep(item):
|
||||
|
||||
patron = r'<article.*?"TPost C".*?href="([^"]+)".*?img.*?src="([^"]+)"'
|
||||
patron += r'.*?class="ClB">([^<]+)<\/span>([^<]+).*?<h3.*?>([^<]+)'
|
||||
|
||||
data = httptools.downloadpage(item.url).data
|
||||
|
||||
matches = re.compile(patron, re.DOTALL).findall(data)
|
||||
itemlist = []
|
||||
for scrapedurl, scrapedthumbnail, scrapednum, scrapedep, scrapedtitle in matches:
|
||||
itemlist.append(
|
||||
Item(channel=item.channel,
|
||||
action="findvideos",
|
||||
contentType=item.contentType,
|
||||
title="[B]" + scrapednum + "[/B]" + scrapedep + " - " + scrapedtitle,
|
||||
fulltitle=scrapedep + " " + scrapedtitle,
|
||||
show=scrapedep + " " + scrapedtitle,
|
||||
url=scrapedurl,
|
||||
extra=item.extra,
|
||||
thumbnail="http:" + scrapedthumbnail,
|
||||
infoLabels=item.infoLabels
|
||||
))
|
||||
|
||||
support.nextPage(itemlist, item, data, r'page-numbers current.*?href="([^"]+)"')
|
||||
|
||||
return itemlist
|
||||
|
||||
|
||||
def peliculas_menu(item):
|
||||
itemlist = peliculas(item)
|
||||
return itemlist[:-1]
|
||||
|
||||
|
||||
def episodios(item):
|
||||
patron = r'<td class="MvTbTtl"><a href="([^"]+)">(.*?)<\/a>.*?>\d{4}<'
|
||||
list_groups = ["url", "title", "year"]
|
||||
|
||||
itemlist = support.scrape(item, patron, list_groups)
|
||||
|
||||
for itm in itemlist:
|
||||
fixedtitle = scrapertools.get_season_and_episode(itm.url)
|
||||
itm.title = fixedtitle + " - " + itm.title
|
||||
itm.fulltitle = fixedtitle + " - " + itm.fulltitle
|
||||
|
||||
return itemlist
|
||||
|
||||
|
||||
def menu(item):
|
||||
patron_block = r'<ul class="sub-menu">.*?</ul>'
|
||||
patron = r'menu-category-list"><a href="([^"]+)">([^<]+)<'
|
||||
list_groups = ["url", "title"]
|
||||
|
||||
return support.scrape(item, patron, list_groups, blacklist="Anime", action="peliculas_menu", patron_block=patron_block)
|
||||
|
||||
|
||||
def search(item, texto):
|
||||
support.log("s=", texto)
|
||||
item.url = host + "/?s=" + texto
|
||||
try:
|
||||
return peliculas(item)
|
||||
# Continua la ricerca in caso di errore
|
||||
except Exception, e:
|
||||
import traceback
|
||||
traceback.print_stack()
|
||||
support.log(str(e))
|
||||
return []
|
||||
|
||||
|
||||
def newest(categoria):
|
||||
support.log("newest" + categoria)
|
||||
itemlist = []
|
||||
item = Item()
|
||||
try:
|
||||
if categoria == "series":
|
||||
item.url = host + "/episodi/"
|
||||
item.action = "peliculas"
|
||||
item.args = "lateste"
|
||||
item.contentType = "episode"
|
||||
itemlist = peliculas(item)
|
||||
|
||||
if itemlist[-1].action == "peliculas":
|
||||
itemlist.pop()
|
||||
|
||||
# Continua la ricerca in caso di errore
|
||||
except Exception, e:
|
||||
import traceback
|
||||
traceback.print_stack()
|
||||
support.log(str(e))
|
||||
return []
|
||||
|
||||
return itemlist
|
||||
|
||||
|
||||
def findvideos(item):
|
||||
|
||||
if item.quality.lower() in ["ended", "canceled", "returning series"]:
|
||||
return episodios(item)
|
||||
|
||||
itemlist = []
|
||||
data = scrapertoolsV2.decodeHtmlentities(httptools.downloadpage(item.url).data)
|
||||
btns = re.compile(r'data-tplayernv="Opt.*?><span>([^<]+)</span><span>([^<]+)</span>', re.DOTALL).findall(data)
|
||||
matches = re.compile(r'<iframe.*?src="([^"]+trembed=[^"]+)', re.DOTALL).findall(data)
|
||||
for i, scrapedurl in enumerate(matches):
|
||||
|
||||
scrapedurl = scrapertoolsV2.decodeHtmlentities(scrapedurl)
|
||||
patron = r'<iframe.*?src="([^"]+)"'
|
||||
link_data = httptools.downloadpage(scrapedurl).data
|
||||
url = scrapertoolsV2.find_single_match(link_data, patron)
|
||||
|
||||
itemlist.append(
|
||||
Item(channel=item.channel,
|
||||
action="play",
|
||||
contentType=item.contentType,
|
||||
title="[B]" + btns[i][0] + "[/B] - " + btns[i][1],
|
||||
fulltitle=btns[i][0] + " " + btns[i][1],
|
||||
show=btns[i][0] + " " + btns[i][1],
|
||||
url=url,
|
||||
extra=item.extra,
|
||||
infoLabels=item.infoLabels,
|
||||
server=btns[i][0],
|
||||
contentQuality=btns[i][1].replace('Italiano - ', ''),
|
||||
))
|
||||
|
||||
if item.contentType == "movie":
|
||||
support.videolibrary(itemlist, item)
|
||||
autoplay.start(itemlist, item)
|
||||
|
||||
return itemlist
|
||||
|
||||
|
||||
@@ -135,25 +135,24 @@ def scrape(item, patron = '', listGroups = [], headers="", blacklist="", data=""
|
||||
matches = scrapertoolsV2.find_multiple_matches(block, patron)
|
||||
log('MATCHES =', matches)
|
||||
|
||||
known_keys = ['url', 'title', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere', 'rating']
|
||||
for match in matches:
|
||||
if len(listGroups) > len(match): # to fix a bug
|
||||
match = list(match)
|
||||
match.extend([''] * (len(listGroups)-len(match)))
|
||||
match.extend([''] * (len(listGroups) - len(match)))
|
||||
|
||||
scrapedurl = url_host+match[listGroups.index('url')] if 'url' in listGroups else ''
|
||||
scrapedtitle = match[listGroups.index('title')] if 'title' in listGroups else ''
|
||||
scrapedthumb = match[listGroups.index('thumb')] if 'thumb' in listGroups else ''
|
||||
scrapedquality = match[listGroups.index('quality')] if 'quality' in listGroups else ''
|
||||
scrapedyear = match[listGroups.index('year')] if 'year' in listGroups else ''
|
||||
scrapedplot = match[listGroups.index('plot')] if 'plot' in listGroups else ''
|
||||
scrapedduration = match[listGroups.index('duration')] if 'duration' in listGroups else ''
|
||||
scrapedgenre = match[listGroups.index('genre')] if 'genre' in listGroups else ''
|
||||
scrapedrating = match[listGroups.index('rating')] if 'rating' in listGroups else ''
|
||||
scraped = {}
|
||||
for kk in known_keys:
|
||||
val = match[listGroups.index(kk)] if kk in listGroups else ''
|
||||
if kk == "url":
|
||||
val = url_host + val
|
||||
scraped[kk] = val
|
||||
|
||||
title = scrapertoolsV2.decodeHtmlentities(scrapedtitle)
|
||||
plot = scrapertoolsV2.decodeHtmlentities(scrapedplot)
|
||||
if scrapedquality:
|
||||
longtitle = '[B]' + title + '[/B] [COLOR blue][' + scrapedquality + '][/COLOR]'
|
||||
title = scrapertoolsV2.decodeHtmlentities(scraped["title"]).strip()
|
||||
plot = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped["plot"]))
|
||||
|
||||
if scraped["quality"]:
|
||||
longtitle = '[B]' + title + '[/B] [COLOR blue][' + scraped["quality"] + '][/COLOR]'
|
||||
else:
|
||||
longtitle = '[B]' + title + '[/B]'
|
||||
|
||||
@@ -161,37 +160,48 @@ def scrape(item, patron = '', listGroups = [], headers="", blacklist="", data=""
|
||||
infolabels = item.infoLabels
|
||||
else:
|
||||
infolabels = {}
|
||||
if scrapedyear:
|
||||
infolabels['year'] = scrapedyear
|
||||
if scrapedplot:
|
||||
if scraped["year"]:
|
||||
infolabels['year'] = scraped["year"]
|
||||
if scraped["plot"]:
|
||||
infolabels['plot'] = plot
|
||||
if scrapedduration:
|
||||
matches = scrapertoolsV2.find_multiple_matches(scrapedduration, r'([0-9])\s*?(?:[hH]|:|\.|,|\\|\/|\||\s)\s*?([0-9]+)')
|
||||
if scraped["duration"]:
|
||||
matches = scrapertoolsV2.find_multiple_matches(scraped["duration"],r'([0-9])\s*?(?:[hH]|:|\.|,|\\|\/|\||\s)\s*?([0-9]+)')
|
||||
for h, m in matches:
|
||||
scrapedduration = int(h) * 60 + int(m)
|
||||
infolabels['duration'] = int(scrapedduration) * 60
|
||||
if scrapedgenre:
|
||||
genres = scrapertoolsV2.find_multiple_matches(scrapedgenre, '[A-Za-z]+')
|
||||
infolabels['genre'] = ", ".join(genres)
|
||||
if scrapedrating:
|
||||
infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scrapedrating)
|
||||
scraped["duration"] = int(h) * 60 + int(m)
|
||||
if not matches:
|
||||
scraped["duration"] = scrapertoolsV2.find_single_match(scraped["duration"], r'(\d+)')
|
||||
infolabels['duration'] = int(scraped["duration"]) * 60
|
||||
if scraped["genere"]:
|
||||
genres = scrapertoolsV2.find_multiple_matches(scraped["genere"], '[A-Za-z]+')
|
||||
infolabels['genere'] = ", ".join(genres)
|
||||
if scraped["rating"]:
|
||||
infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scraped["rating"])
|
||||
|
||||
if not scrapedtitle in blacklist:
|
||||
itemlist.append(
|
||||
Item(channel=item.channel,
|
||||
action=action,
|
||||
contentType=item.contentType,
|
||||
title=longtitle,
|
||||
fulltitle=title,
|
||||
show=title,
|
||||
quality=scrapedquality,
|
||||
url=scrapedurl,
|
||||
infoLabels=infolabels,
|
||||
thumbnail=scrapedthumb
|
||||
)
|
||||
if scraped["title"] not in blacklist:
|
||||
it = Item(
|
||||
channel=item.channel,
|
||||
action=action,
|
||||
contentType=item.contentType,
|
||||
title=longtitle,
|
||||
fulltitle=title,
|
||||
show=title,
|
||||
quality=scraped["quality"],
|
||||
url=scraped["url"],
|
||||
infoLabels=infolabels,
|
||||
thumbnail=scraped["thumb"]
|
||||
)
|
||||
|
||||
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
|
||||
for lg in list(set(listGroups).difference(known_keys)):
|
||||
it.__setattr__(lg, match[listGroups.index(lg)])
|
||||
|
||||
itemlist.append(it)
|
||||
|
||||
if (item.contentType == "episode" and (action != "findvideos" and action != "play")) \
|
||||
or (item.contentType == "movie" and action != "play"):
|
||||
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
|
||||
else:
|
||||
for it in itemlist:
|
||||
it.infoLabels = item.infoLabels
|
||||
|
||||
if patronNext:
|
||||
nextPage(itemlist, item, data, patronNext, 2)
|
||||
|
||||
Reference in New Issue
Block a user