some fixes for cineblog01

This commit is contained in:
mac12m99
2019-03-16 12:31:07 +01:00
parent 16f14ff6b1
commit 65d0ecb3d8
+48 -39
View File
@@ -8,7 +8,7 @@ import re
import urlparse import urlparse
from channels import autoplay, filtertools from channels import autoplay, filtertools
from core import scrapertools, httptools, servertools, tmdb from core import scrapertoolsV2, httptools, servertools, tmdb
from core.item import Item from core.item import Item
from lib import unshortenit from lib import unshortenit
from platformcode import logger, config from platformcode import logger, config
@@ -117,7 +117,7 @@ def menu(item):
itemlist= [] itemlist= []
data = httptools.downloadpage(item.url, headers=headers).data data = httptools.downloadpage(item.url, headers=headers).data
data = re.sub('\n|\t','',data) data = re.sub('\n|\t','',data)
block = scrapertools.get_match(data, item.extra + r'<span.*?><\/span>.*?<ul.*?>(.*?)<\/ul>') block = scrapertoolsV2.get_match(data, item.extra + r'<span.*?><\/span>.*?<ul.*?>(.*?)<\/ul>')
patron = r'href="([^"]+)">(.*?)<\/a>' patron = r'href="([^"]+)">(.*?)<\/a>'
matches = re.compile(patron, re.DOTALL).findall(block) matches = re.compile(patron, re.DOTALL).findall(block)
for scrapedurl, scrapedtitle in matches: for scrapedurl, scrapedtitle in matches:
@@ -157,7 +157,7 @@ def newest(categoria):
# Carica la pagina # Carica la pagina
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
logger.info("[cineblog01.py] DATA: "+data) logger.info("[cineblog01.py] DATA: "+data)
blocco = scrapertools.get_match(data, r'Ultimi 100 film aggiunti:.*?<\/td>') blocco = scrapertoolsV2.get_match(data, r'Ultimi 100 film aggiunti:.*?<\/td>')
patron = r'<a href="([^"]+)">([^<]+)<\/a>' patron = r'<a href="([^"]+)">([^<]+)<\/a>'
matches = re.compile(patron, re.DOTALL).findall(blocco) matches = re.compile(patron, re.DOTALL).findall(blocco)
@@ -187,20 +187,21 @@ def video(item):
data = httptools.downloadpage(item.url, headers=headers).data data = httptools.downloadpage(item.url, headers=headers).data
data = re.sub('\n|\t','',data) data = re.sub('\n|\t','',data)
block = scrapertools.get_match(data, r'<div class="sequex-page-left">(.*?)<aside class="sequex-page-right">') block = scrapertoolsV2.get_match(data, r'<div class="sequex-page-left">(.*?)<aside class="sequex-page-right">')
if item.contentType == 'movie' or '/serietv/' not in item.url: if item.contentType == 'movie' or '/serietv/' not in item.url:
action = 'findvideos' action = 'findvideos'
logger.info("### FILM ###") logger.info("### FILM ###")
patron = r'type-post.*?>.*?<img src="([^"]+)".*?<h3.*?<a href="([^"]+)">([^<]+)<\/a>.*?<strong>([^<]+)<.*?br \/>\s+(.*?) ' patron = r'type-post.*?>.*?<img src="([^"]+)".*?<h3.*?<a href="([^"]+)">([^<]+)<\/a>.*?<strong>([^<]+)<.*?br \/>\s+(.*?) '
matches = re.compile(patron, re.DOTALL).findall(block) matches = re.compile(patron, re.DOTALL).findall(block)
logger.info("### MATCHES ###" + str(matches)) logger.info("### MATCHES ###" + str(matches))
for scrapedthumb, scrapedurl, scrapedtitle, scrapedinfo, scrapedplot in matches: for scrapedthumb, scrapedurl, scrapedtitle, scrapedinfo, scrapedplot in matches:
title = re.sub(r'(?:\[HD/?3?D?\]|\[Sub-ITA\])', '', scrapedtitle) title = re.sub(r'(?:\[HD/?3?D?\]|\[Sub-ITA\])', '', scrapedtitle)
year = scrapertools.find_single_match(scrapedtitle, r'\((\d{4})\)') year = scrapertoolsV2.find_single_match(scrapedtitle, r'\((\d{4})\)')
quality = scrapertools.find_single_match(scrapedtitle, r'\[(.*?)\]') quality = scrapertoolsV2.find_single_match(scrapedtitle, r'\[(.*?)\]')
genre = scrapertools.find_single_match(scrapedinfo, '([A-Z]+) &') genre = scrapertoolsV2.remove_htmltags(scrapertoolsV2.find_single_match(scrapedinfo, '([A-Z]+) &'))
duration = scrapertools.find_single_match(scrapedinfo,'DURATA ([0-9]+)&') duration = scrapertoolsV2.find_single_match(scrapedinfo,'DURATA ([0-9]+)&')
infolabels = {} infolabels = {}
if year: if year:
@@ -215,7 +216,7 @@ def video(item):
else: else:
longtitle = '[B]' + title + '[/B]' longtitle = '[B]' + title + '[/B]'
infolabels['Plot'] = scrapedplot + '...' infolabels['Plot'] = scrapertoolsV2.decodeHtmlentities(scrapedplot) + '...'
if not scrapedtitle in blacklist: if not scrapedtitle in blacklist:
itemlist.append( itemlist.append(
@@ -236,16 +237,15 @@ def video(item):
matches = re.compile(patron, re.DOTALL).findall(block) matches = re.compile(patron, re.DOTALL).findall(block)
for match in matches: for match in matches:
patron = r'<img src="([^"]+)".*?<h3.*?<a href="([^"]+)">([^<]+)<\/a>.*?<p>(.*?)\(([0-9]+).*?\) (.*?)<\/p>' patron = r'<img src="([^"]+)".*?<h3.*?<a href="([^"]+)">([^<]+)<\/a>.*?<p>(.*?)\(([0-9]+).*?\).*?<\/p>([^<>]*)(?:<\/p>)?'
matches = re.compile(patron, re.DOTALL).findall(match) matches = re.compile(patron, re.DOTALL).findall(match)
for scrapedthumb, scrapedurl, scrapedtitle, scrapedgenre, scrapedyear, scrapedplot in matches: for scrapedthumb, scrapedurl, scrapedtitle, scrapedgenre, scrapedyear, scrapedplot in matches:
longtitle = '[B]' + scrapedtitle + '[/B]' longtitle = '[B]' + scrapedtitle + '[/B]'
title = scrapedtitle title = scrapedtitle
infolabels = {} infolabels = {}
infolabels['Year'] = scrapedyear infolabels['Year'] = scrapedyear
infolabels['Genre'] = scrapedgenre infolabels['Genre'] = scrapertoolsV2.remove_htmltags(scrapedgenre)
infolabels['Plot'] = scrapedplot infolabels['Plot'] = scrapertoolsV2.decodeHtmlentities(scrapedplot)
if not scrapedtitle in blacklist: if not scrapedtitle in blacklist:
itemlist.append( itemlist.append(
Item(channel=item.channel, Item(channel=item.channel,
@@ -261,9 +261,9 @@ def video(item):
) )
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
patron = "<a class='page-link'" + ' href="(.*?)"><i class="fa fa-angle-right">' patron = "<a class='page-link'" + ' href="([^"]+)"><i class="fa fa-angle-right">'
next_page = scrapertools.find_single_match(data, patron) next_page = scrapertoolsV2.find_single_match(data, patron)
logger.info('NEXT '+next_page) logger.info('NEXT '+next_page)
if next_page != "": if next_page != "":
@@ -282,22 +282,31 @@ def episodios(item):
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url, headers=headers).data data = httptools.downloadpage(item.url, headers=headers).data
data = re.sub('\n|\t','',data) data = re.sub('\n|\t','',data)
block = scrapertools.get_match(data, r'<article class="sequex-post-content">(.*?)<\/article>').replace('&#215;','x').replace(' &#8211; ','') block = scrapertoolsV2.get_match(data, r'<article class="sequex-post-content">(.*?)<\/article>').replace('&#215;','x').replace(' &#8211; ','')
patron = r'<p>([0-9]+x[0-9]+)(.*?)<\/p>' logger.info(block)
matches = re.compile(patron, re.DOTALL).findall(block) blockSeason = scrapertoolsV2.find_multiple_matches(block, '<div class="sp-head[a-z ]*?" title="Espandi">([^<>]*?)</div>(.*?)<div class="spdiv">\[riduci\]</div>')
for scrapedtitle, scrapedurl in matches: for season, block in blockSeason:
title = '[B]' + scrapedtitle + '[/B] - ' + item.title patron = r'(?:<p>)?([0-9]+x[0-9]+)(.*?)(?:</p>|<br)'
itemlist.append( matches = re.compile(patron, re.DOTALL).findall(block)
Item(channel=item.channel, for scrapedtitle, scrapedurl in matches:
action="findvideos", title = '[B]' + scrapedtitle + '[/B] - ' + item.title + (' (SUB ITA)' if 'SUB ITA' in season else ' (ITA)')
contentType=item.contentType, itemlist.append(
title=title, Item(channel=item.channel,
fulltitle=title, action="findvideos",
show=title, contentType=item.contentType,
url=scrapedurl, title=title,
fulltitle=item.fulltitle,
show=item.fulltitle,
url=scrapedurl,
)
) )
)
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
if config.get_videolibrary_support() and len(itemlist) > 0 and item.extra != 'findvideos':
itemlist.append(
Item(channel=item.channel, title='[COLOR yellow][B]'+config.get_localized_string(30161)+'[/B][/COLOR]', url=item.url,
action="add_serie_to_library", extra="episodios", show=item.show))
return itemlist return itemlist
@@ -305,7 +314,7 @@ def findvideos(item):
if item.contentType == "episode": if item.contentType == "episode":
return findvid_serie(item) return findvid_serie(item)
def load_links(itemlist, re_txt, color, desc_txt, quality=""): def load_links(itemlist, re_txt, color, desc_txt, quality=""):
streaming = scrapertools.find_single_match(data, re_txt) streaming = scrapertoolsV2.find_single_match(data, re_txt)
patron = '<td><a[^h]href="([^"]+)"[^>]+>([^<]+)<' patron = '<td><a[^h]href="([^"]+)"[^>]+>([^<]+)<'
matches = re.compile(patron, re.DOTALL).findall(streaming) matches = re.compile(patron, re.DOTALL).findall(streaming)
for scrapedurl, scrapedtitle in matches: for scrapedurl, scrapedtitle in matches:
@@ -336,7 +345,7 @@ def findvideos(item):
matches = re.compile(patronvideos, re.DOTALL).finditer(data) matches = re.compile(patronvideos, re.DOTALL).finditer(data)
QualityStr = "" QualityStr = ""
for match in matches: for match in matches:
QualityStr = scrapertools.unescape(match.group(1))[6:] QualityStr = scrapertoolsV2.decodeHtmlentities(match.group(1))[6:]
# Estrae i contenuti - Streaming # Estrae i contenuti - Streaming
load_links(itemlist, '<strong>Streaming:</strong>(.*?)<table class="cbtable" height="30">', "orange", "Streaming", "SD") load_links(itemlist, '<strong>Streaming:</strong>(.*?)<table class="cbtable" height="30">', "orange", "Streaming", "SD")
@@ -375,8 +384,8 @@ def findvideos(item):
if item.contentType != 'episode': if item.contentType != 'episode':
if config.get_videolibrary_support() and len(itemlist) > 0 and item.extra != 'findvideos': if config.get_videolibrary_support() and len(itemlist) > 0 and item.extra != 'findvideos':
itemlist.append( itemlist.append(
Item(channel=item.channel, title='[COLOR yellow][B]Aggiungi alla videoteca[/B][/COLOR]', url=item.url, Item(channel=item.channel, title='[COLOR yellow][B]'+config.get_localized_string(30161)+'[/B][/COLOR]', url=item.url,
action="add_pelicula_to_library", extra="findvideos", contentTitle=item.contentTitle)) action="add_pelicula_to_library", extra="findvideos", contentTitle=item.fulltitle))
return itemlist return itemlist
@@ -461,12 +470,12 @@ def play(item):
if "go.php" in item.url: if "go.php" in item.url:
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
try: try:
data = scrapertools.get_match(data, 'window.location.href = "([^"]+)";') data = scrapertoolsV2.get_match(data, 'window.location.href = "([^"]+)";')
except IndexError: except IndexError:
try: try:
# data = scrapertools.get_match(data, r'<a href="([^"]+)">clicca qui</a>') # data = scrapertoolsV2.get_match(data, r'<a href="([^"]+)">clicca qui</a>')
# In alternativa, dato che a volte compare "Clicca qui per proseguire": # In alternativa, dato che a volte compare "Clicca qui per proseguire":
data = scrapertools.get_match(data, r'<a href="([^"]+)".*?class="btn-wrapper">.*?licca.*?</a>') data = scrapertoolsV2.get_match(data, r'<a href="([^"]+)".*?class="btn-wrapper">.*?licca.*?</a>')
except IndexError: except IndexError:
data = httptools.downloadpage(item.url, only_headers=True, follow_redirects=False).headers.get( data = httptools.downloadpage(item.url, only_headers=True, follow_redirects=False).headers.get(
"location", "") "location", "")
@@ -477,13 +486,13 @@ def play(item):
from lib import jsunpack from lib import jsunpack
try: try:
data = scrapertools.get_match(data, r"(eval\(function\(p,a,c,k,e,d.*?)</script>") data = scrapertoolsV2.get_match(data, r"(eval\(function\(p,a,c,k,e,d.*?)</script>")
data = jsunpack.unpack(data) data = jsunpack.unpack(data)
logger.debug("##### play /link/ unpack ##\n%s\n##" % data) logger.debug("##### play /link/ unpack ##\n%s\n##" % data)
except IndexError: except IndexError:
logger.debug("##### The content is yet unpacked ##\n%s\n##" % data) logger.debug("##### The content is yet unpacked ##\n%s\n##" % data)
data = scrapertools.find_single_match(data, r'var link(?:\s)?=(?:\s)?"([^"]+)";') data = scrapertoolsV2.find_single_match(data, r'var link(?:\s)?=(?:\s)?"([^"]+)";')
data, c = unshortenit.unwrap_30x_only(data) data, c = unshortenit.unwrap_30x_only(data)
if data.startswith('/'): if data.startswith('/'):
data = urlparse.urljoin("http://swzz.xyz", data) data = urlparse.urljoin("http://swzz.xyz", data)