add support.py, a list of helper functions

This commit is contained in:
mac12m99
2019-03-24 21:06:22 +01:00
parent 537ee7be61
commit 66769d67f7
3 changed files with 244 additions and 265 deletions

View File

@@ -1,36 +0,0 @@
{
"id": "filmhdstreaming",
"name": "Filmhdstreaming",
"active": true,
"adult": false,
"language": ["ita"],
"thumbnail": "http:\/\/hdcineblog01.com\/css\/images\/logo3.png",
"bannermenu": "http:\/\/hdcineblog01.com\/css\/images\/logo3.png",
"categories": ["movie"],
"settings": [
{
"id": "include_in_global_search",
"type": "bool",
"label": "Includi ricerca globale",
"default": false,
"enabled": false,
"visible": false
},
{
"id": "include_in_newest_peliculas",
"type": "bool",
"label": "Includi in Novità - Film",
"default": true,
"enabled": true,
"visible": true
},
{
"id": "include_in_newest_italiano",
"type": "bool",
"label": "Includi in Novità - Italiano",
"default": true,
"enabled": true,
"visible": true
}
]
}

View File

@@ -1,229 +0,0 @@
# -*- coding: utf-8 -*-
# ------------------------------------------------------------
# Ringraziamo Icarus crew
# Canale per filmhdstreaming
# ----------------------------------------------------------
import re
import urlparse
from core import httptools
from platformcode import logger, config
from core import scrapertools
from core.item import Item
from core import tmdb
host = "https://cb01.mobi/"
def mainlist(item):
logger.info("filmhdstreaming mainlist")
itemlist = []
# itemlist.append(Item(channel=item.channel, action="elenco_ten", title="[COLOR yellow]Film Top 10[/COLOR]", url=host,thumbnail=NovitaThumbnail, fanart=fanart))
# itemlist.append(Item(channel=item.channel, action="elenco_top", title="[COLOR azure]Film Top[/COLOR]", url=host,thumbnail=NovitaThumbnail, fanart=fanart))
itemlist.append(Item(channel=item.channel, action="elenco", title="[COLOR azure]Aggiornamenti Film[/COLOR]",
url=host + "/page/1.html", thumbnail=NovitaThumbnail, fanart=fanart))
itemlist.append(
Item(channel=item.channel, action="elenco_genere", title="[COLOR azure]Film per Genere[/COLOR]", url=host,
thumbnail=GenereThumbnail, fanart=fanart))
itemlist.append(
Item(channel=item.channel, action="search", title="[COLOR orange]Cerca film...[/COLOR]", extra="movie",
thumbnail=thumbcerca, fanart=fanart))
return itemlist
def newest(categoria):
logger.info("filmhdstreaming newest" + categoria)
itemlist = []
item = Item()
try:
if categoria == "film":
item.url = host + "/page/1.html"
item.action = "elenco"
itemlist = elenco(item)
if itemlist[-1].action == "elenco":
itemlist.pop()
# Continua la ricerca in caso di errore
except:
import sys
for line in sys.exc_info():
logger.error("{0}".format(line))
return []
return itemlist
def elenco_top(item):
logger.info("filmhdstreaming elenco_top")
itemlist = []
data = httptools.downloadpage(item.url).data
# metodo che utilizzo pee verificare cosa scarica nella chace
# provate and andare nel log di kodi e controllate in fondo...
# io uso notepad ++ che ha come vantaggio di auto aggiornarsi ad ogni cambiamento del file
# per non stare ad aprire e chidere tutte le vole il file di log di kodi
logger.info("ecco la pagina completa ->" + data)
# nel patron in questo caso tutto ciò che è tra > e class= verrà preso in cosiderazione
patron = 'id="box_movies1">(.*?)class="header_slider">'
filtro_top = scrapertools.find_single_match(data, patron)
# controllo log
logger.info("filtrato ->" + filtro_top)
patron = 'class="movie">[^>]+><a href="(.*?)"><img src="(.*?)".*?<h2>(.*?)<\/h2>'
matches = scrapertools.find_multiple_matches(filtro_top, patron)
for scrapedurl, scrapedimg, scrapedtitle in matches:
# sempre per controllare il log
logger.info("Url:" + scrapedurl + " thumbnail:" + scrapedimg + " title:" + scrapedtitle)
title = scrapedtitle.split("(")[0]
itemlist.append(Item(channel=item.channel,
action="findvideos",
title="[COLOR azure]" + scrapedtitle + "[/COLOR]",
fulltitle=scrapedtitle,
url=scrapedurl,
thumbnail=scrapedimg,
fanart=""
))
return itemlist
def elenco(item):
logger.info("filmhdstreaming elenco")
itemlist = []
data = httptools.downloadpage(item.url).data
patron = r'<a href="([^"]+)" title="([^"]+)"><img src="([^"]+)"[^>]+>'
matches = re.compile(patron, re.DOTALL).findall(data)
for scrapedurl, scrapedtitle, scrapedthumbnail in matches:
scrapedplot = ""
scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle)
scrapedtitle = scrapedtitle.replace(" streaming ita", "")
scrapedtitle = scrapedtitle.replace(" film streaming", "")
scrapedtitle = scrapedtitle.replace(" streaming gratis", "")
itemlist.append(
Item(channel=item.channel,
action="findvideos",
contentType="movie",
fulltitle=scrapedtitle,
show=scrapedtitle,
title="[COLOR azure]" + scrapedtitle + "[/COLOR]",
url=scrapedurl,
thumbnail=scrapedthumbnail,
plot=scrapedplot,
folder=True))
# Paginazione
patronvideos = r'<a class="page dark gradient" href=["|\']+([^"]+)["|\']+>AVANTI'
matches = re.compile(patronvideos, re.DOTALL).findall(data)
if len(matches) > 0:
scrapedurl = urlparse.urljoin(re.sub(r'\d+.html$', '', item.url), matches[0])
itemlist.append(
Item(channel=item.channel,
action="elenco",
title="[COLOR lightgreen]" + config.get_localized_string(30992) + "[/COLOR]",
url=scrapedurl,
thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png",
folder=True))
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
return itemlist
def elenco_genere(item):
logger.info("filmhdstreaming elenco_genere")
itemlist = []
# Carica la pagina
data = httptools.downloadpage(item.url).data
bloque = scrapertools.get_match(data, '<ul>(.*?)</ul>')
# Estrae i contenuti
patron = '<li><a href="([^"]+)">[^>]+></i>\s*([^<]+)</a></li>'
matches = re.compile(patron, re.DOTALL).findall(bloque)
for scrapedurl, scrapedtitle in matches:
scrapedtitle = scrapedtitle.replace("Film streaming ", "")
itemlist.append(
Item(channel=item.channel,
action="elenco",
title="[COLOR azure]" + scrapedtitle + "[/COLOR]",
url=scrapedurl,
thumbnail="http://orig03.deviantart.net/6889/f/2014/079/7/b/movies_and_popcorn_folder_icon_by_matheusgrilo-d7ay4tw.png",
folder=True))
return itemlist
def elenco_ten(item):
logger.info("filmhdstreaming elenco_ten")
itemlist = []
data = httptools.downloadpage(item.url).data
patron = '<ul class="lista">(.*?)</ul>'
filtro = scrapertools.find_single_match(data, patron)
patron = '<li>.*?href="(.*?)">(.*?)</a>'
matches = scrapertools.find_multiple_matches(filtro, patron)
for scrapedurl, scrapedtitle in matches:
logger.info("Url:" + scrapedurl + " title:" + scrapedtitle)
itemlist.append(Item(channel=item.channel,
action="findvideos",
title="[COLOR azure]" + scrapedtitle + "[/COLOR]",
fulltitle=scrapedtitle,
url=scrapedurl,
thumbnail="",
fanart=""
))
return itemlist
def search(item, texto):
logger.info("filmhdstreaming search " + texto)
itemlist = []
item.url = host + "/search/" + texto
try:
return elenco(item)
# Continua la ricerca in caso di errore
except:
import sys
for line in sys.exc_info():
logger.error("%s" % line)
return []
GenereThumbnail = "https://farm8.staticflickr.com/7562/15516589868_13689936d0_o.png"
NovitaThumbnail = "https://superrepo.org/static/images/icons/original/xplugin.video.moviereleases.png.pagespeed.ic.j4bhi0Vp3d.png"
thumbcerca = "http://dc467.4shared.com/img/fEbJqOum/s7/13feaf0c8c0/Search"
fanart = "https://superrepo.org/static/images/fanart/original/script.artwork.downloader.jpg"
AvantiTxt = config.get_localized_string(30992)
AvantiImg = "http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png"
thumbnovita = "http://orig03.deviantart.net/6889/f/2014/079/7/b/movies_and_popcorn_folder_icon_by_matheusgrilo-d7ay4tw.png"

View File

@@ -0,0 +1,244 @@
# support functions that are needed by many channels, to no repeat the same code
import base64, urlparse, re
from core import httptools, scrapertoolsV2, servertools, tmdb
from core.item import Item
import urllib
from lib import unshortenit
from platformcode import logger, config
def hdpass_get_servers(item):
# Carica la pagina
data = httptools.downloadpage(item.url).data.replace('\n', '')
patron = r'<iframe(?: id="[^"]+")? width="[^"]+" height="[^"]+" src="([^"]+)"[^>]+><\/iframe>'
url = scrapertoolsV2.find_single_match(data, patron).replace("?alta", "")
url = url.replace("&download=1", "")
if 'hdpass' in url:
data = httptools.downloadpage(url).data
start = data.find('<div class="row mobileRes">')
end = data.find('<div id="playerFront">', start)
data = data[start:end]
patron_res = '<div class="row mobileRes">(.*?)</div>'
patron_mir = '<div class="row mobileMirrs">(.*?)</div>'
patron_media = r'<input type="hidden" name="urlEmbed" data-mirror="([^"]+)" id="urlEmbed" value="([^"]+)"\s*/>'
res = scrapertoolsV2.find_single_match(data, patron_res)
itemlist = []
for res_url, res_video in scrapertoolsV2.find_multiple_matches(res, '<option.*?value="([^"]+?)">([^<]+?)</option>'):
data = httptools.downloadpage(urlparse.urljoin(url, res_url)).data.replace('\n', '')
mir = scrapertoolsV2.find_single_match(data, patron_mir)
for mir_url, server in scrapertoolsV2.find_multiple_matches(mir, '<option.*?value="([^"]+?)">([^<]+?)</value>'):
data = httptools.downloadpage(urlparse.urljoin(url, mir_url)).data.replace('\n', '')
for media_label, media_url in scrapertoolsV2.find_multiple_matches(data, patron_media):
itemlist.append(Item(channel=item.channel,
action="play",
title=item.title+"["+color(server, 'orange')+"]"+" - "+color(res_video, 'green'),
fulltitle=item.fulltitle,
quality=res_video,
show=item.show,
thumbnail=item.thumbnail,
contentType=item.contentType,
server=server,
url=url_decode(media_url)))
return itemlist
def url_decode(url_enc):
lenght = len(url_enc)
if lenght % 2 == 0:
len2 = lenght / 2
first = url_enc[0:len2]
last = url_enc[len2:lenght]
url_enc = last + first
reverse = url_enc[::-1]
return base64.b64decode(reverse)
last_car = url_enc[lenght - 1]
url_enc[lenght - 1] = ' '
url_enc = url_enc.strip()
len1 = len(url_enc)
len2 = len1 / 2
first = url_enc[0:len2]
last = url_enc[len2:len1]
url_enc = last + first
reverse = url_enc[::-1]
reverse = reverse + last_car
return base64.b64decode(reverse)
def color(text, color):
return "[COLOR " + color + "]" + text + "[/COLOR]"
def scrape(item, itemlist, patron, listGroups, headers="", blacklist="", data="", patron_block="", patronNext="", action="findvideos", url_host=""):
# patron: the patron to use for scraping page, all capturing group must match with listGroups
# listGroups: a list containing the scraping info obtained by your patron, in order
# accepted values are: url, title, thumb, quality, year, plot, duration, genre
# header: values to pass to request header
# blacklist: titles that you want to exclude(service articles for example)
# patronNext: patron for scraping next page link
# action: if you want results perform an action different from "findvideos", useful when scraping film by genres
# url_host: string to prepend to scrapedurl, useful when url don't contain host
# example usage:
# import support
# itemlist = []
# patron = 'blablabla'
# headers = [['Referer', host]]
# blacklist = 'Request a TV serie!'
# support.scrape(item, itemlist, patron, ['thumb', 'quality', 'url', 'title', 'year', 'plot'], headers=headers, blacklist=blacklist)
# return itemlist
# return data for debugging purposes
if not data:
data = httptools.downloadpage(item.url, headers=headers).data.replace("'", '"')
# replace all ' with ", so we don't need to worry about
if patron_block:
block = scrapertoolsV2.get_match(data, patron_block)
else:
block = data
matches = scrapertoolsV2.find_multiple_matches(block, patron)
for match in matches:
scrapedurl = url_host+match[listGroups.index('url')] if 'url' in listGroups else ''
scrapedtitle = match[listGroups.index('title')] if 'title' in listGroups else ''
scrapedthumb = match[listGroups.index('thumb')] if 'thumb' in listGroups else ''
scrapedquality = match[listGroups.index('quality')] if 'quality' in listGroups else ''
scrapedyear = match[listGroups.index('year')] if 'year' in listGroups else ''
scrapedplot = match[listGroups.index('plot')] if 'plot' in listGroups else ''
scrapedduration = match[listGroups.index('duration')] if 'duration' in listGroups else ''
scrapedgenre = match[listGroups.index('genre')] if 'genre' in listGroups else ''
title = scrapertoolsV2.decodeHtmlentities(scrapedtitle)
plot = scrapertoolsV2.decodeHtmlentities(scrapedplot)
if scrapedquality:
longtitle = '[B]' + title + '[/B] [COLOR blue][' + scrapedquality + '][/COLOR]'
else:
longtitle = '[B]' + title + '[/B]'
infolabels = {}
if scrapedyear:
infolabels['year'] = scrapedyear
if scrapedplot:
infolabels['plot'] = plot
if scrapedduration:
infolabels['duration'] = scrapedduration
if scrapedgenre:
infolabels['genre'] = scrapertoolsV2.find_multiple_matches(scrapedgenre, '(?:<[^<]+?>)?([^<>]+)') # delete all html tags and match text
if not scrapedtitle in blacklist:
itemlist.append(
Item(channel=item.channel,
action=action,
contentType=item.contentType,
title=longtitle,
fulltitle=title,
show=title,
quality=scrapedquality,
url=scrapedurl,
infoLabels=infolabels,
thumbnail=scrapedthumb
)
)
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
if patronNext:
next_page = scrapertoolsV2.find_single_match(data, patronNext)
logger.info('NEXT ' + next_page)
if next_page != "":
itemlist.append(
Item(channel=item.channel,
action="peliculas",
contentType=item.contentType,
title="[COLOR blue]" + config.get_localized_string(30992) + " >[/COLOR]",
url=next_page))
return block
def dooplay_get_links(item, host):
# get links from websites using dooplay theme and dooplay_player
# return a list of dict containing these values: url, title and server
data = httptools.downloadpage(item.url).data.replace("'", '"')
patron = r'<li id="player-option-[0-9]".*?data-type="([^"]+)" data-post="([^"]+)" data-nume="([^"]+)".*?<span class="title".*?>([^<>]+)</span>(?:<span class="server">([^<>]+))?'
matches = scrapertoolsV2.find_multiple_matches(data, patron)
ret = []
for type, post, nume, title, server in matches:
postData = urllib.urlencode({
"action": "doo_player_ajax",
"post": post,
"nume": nume,
"type": type
})
dataAdmin = httptools.downloadpage(host + 'wp-admin/admin-ajax.php', post=postData,headers={'Referer': item.url}).data
link = scrapertoolsV2.get_match(dataAdmin, "<iframe.*src='([^']+)'")
ret.append({
'url': link,
'title': title,
'server': server
})
return ret
def dooplay_films(item, itemlist, blacklist=""):
patron = '<article id="post-[0-9]+" class="item movies">.*?<img src="([^"]+)".*?<span class="quality">([^<>]+).*?<a href="([^"]+)">([^<>]+)</a></h3> (?:<span>([0-9]{4})</span>)?.*?(?:<span>([0-9]+) min</span>)?.*?(?:<div class="texto">([^<>]+).*?)?(?:genres">(.*?)</div>)?'
patronNext = '<a class="arrow_pag" href="([^"]+)"><i id="nextpagination"'
scrape(item, itemlist, patron, ['thumb', 'quality', 'url', 'title', 'year', 'duration', 'plot', 'genre'], blacklist=blacklist, patronNext=patronNext)
def dooplay_search(item, itemlist, blacklist=""):
patron = '<div class="result-item">.*?<img src="([^"]+)".*?<span class="movies">([^<>]+).*?<a href="([^"]+)">([^<>]+)</a>.*?<span class="year">([0-9]{4}).*?<div class="contenido"><p>([^<>]+)'
patronNext = '<a class="arrow_pag" href="([^"]+)"><i id="nextpagination"'
scrape(item, itemlist, patron, ['thumb', 'quality', 'url', 'title', 'year', 'plot'], blacklist=blacklist, patronNext=patronNext)
def swzz_get_url(item):
if "/link/" in item.url:
data = httptools.downloadpage(item.url).data
if "link =" in data:
data = scrapertoolsV2.get_match(data, 'link = "([^"]+)"')
else:
match = scrapertoolsV2.get_match(data, r'<meta name="og:url" content="([^"]+)"')
match = scrapertoolsV2.get_match(data, r'URL=([^"]+)">') if not match else match
if not match:
from lib import jsunpack
try:
data = scrapertoolsV2.get_match(data, r"(eval\s?\(function\(p,a,c,k,e,d.*?)</script>")
data = jsunpack.unpack(data)
logger.debug("##### play /link/ unpack ##\n%s\n##" % data)
except IndexError:
logger.debug("##### The content is yet unpacked ##\n%s\n##" % data)
data = scrapertoolsV2.find_single_match(data, r'var link(?:\s)?=(?:\s)?"([^"]+)";')
data, c = unshortenit.unwrap_30x_only(data)
else:
data = match
if data.startswith('/'):
data = urlparse.urljoin("http://swzz.xyz", data)
data = httptools.downloadpage(data).data
logger.debug("##### play /link/ data ##\n%s\n##" % data)
else:
data = item.url
return data