documentalesonline: updated

This commit is contained in:
Intel1
2018-04-17 11:17:38 -05:00
committed by GitHub
parent 9f314ceba2
commit ceed742ca9
@@ -4,55 +4,79 @@ import re
from core import httptools from core import httptools
from core import scrapertools from core import scrapertools
from core import servertools
from core.item import Item from core.item import Item
from platformcode import logger
from channelselector import get_thumb from channelselector import get_thumb
from platformcode import logger
HOST = "http://documentales-online.com/" HOST = "http://documentales-online.com/"
def mainlist(item): def mainlist(item):
logger.info() logger.info()
itemlist = list() itemlist = list()
itemlist.append(Item(channel=item.channel, title="Novedades", action="videos", url=HOST,
itemlist.append(Item(channel=item.channel, title="Novedades", action="listado", url=HOST,
thumbnail=get_thumb('newest', auto=True))) thumbnail=get_thumb('newest', auto=True)))
itemlist.append(Item(channel=item.channel, title="Destacados", action="seccion", url=HOST, extra="destacados", itemlist.append(Item(channel=item.channel, title="Destacados", action="seccion", url=HOST, extra="destacados",
thumbnail=get_thumb('hot', auto=True))) thumbnail=get_thumb('hot', auto=True)))
itemlist.append(Item(channel=item.channel, title="Series", action="seccion", url=HOST, extra="series", itemlist.append(Item(channel=item.channel, title="Series destacadas", action="seccion", url=HOST, extra="series",
thumbnail=get_thumb('tvshows', auto=True))) thumbnail=get_thumb('tvshows', auto=True)))
itemlist.append(Item(channel=item.channel, title="Categorías", action="categorias", url=HOST, itemlist.append(Item(channel=item.channel, title="Categorías", action="categorias", url=HOST,
thumbnail=get_thumb('categories', auto=True))) thumbnail=get_thumb('categories', auto=True)))
# itemlist.append(Item(channel=item.channel, title="Top 100", action="categorias", url=HOST)) itemlist.append(Item(channel=item.channel, title="Top 100", action="listado", url=HOST + "top/",
# itemlist.append(Item(channel=item.channel, title="Populares", action="categorias", url=HOST)) thumbnail=get_thumb('more voted', auto=True)))
itemlist.append(Item(channel=item.channel, title="Populares", action="listado", url=HOST + "populares/",
thumbnail=get_thumb('more watched', auto=True)))
itemlist.append(Item(channel=item.channel, title="Series y Temas", action="listado", url=HOST + "series-temas/",
thumbnail=get_thumb('tvshows', auto=True)))
itemlist.append(Item(channel=item.channel, title="Buscar", action="search", itemlist.append(Item(channel=item.channel, title="Buscar", action="search",
thumbnail=get_thumb('search', auto=True))) thumbnail=get_thumb('search', auto=True)))
return itemlist
# itemlist.append(Item(channel=item.channel, title=" Series y Temas", action="categorias", url=HOST))
def listado(item):
logger.info()
itemlist = []
data = httptools.downloadpage(item.url).data
data = data.replace('<span class="wpp-views">', '')
bloque = scrapertools.find_single_match(data, 'class="post-entry(.*?)class="post-share')
if "series-temas" not in item.url:
patron = '<a href="([^"]+)".*?'
patron += 'title="([^"]+)".*?'
patron += '/a>([^<]+)<'
matches = scrapertools.find_multiple_matches(bloque, patron)
for scrapedurl, scrapedtitle, scrapedextra in matches:
itemlist.append(Item(action = "findvideos",
channel = item.channel,
title = scrapedtitle + scrapedextra,
url = HOST + scrapedurl
))
else:
patron = """<a href='([^']+)'.*?"""
patron += """>([^<]+)<.*?"""
matches = scrapertools.find_multiple_matches(bloque, patron)
for scrapedurl, scrapedtitle in matches:
itemlist.append(Item(action = "videos",
channel = item.channel,
title = scrapedtitle,
url = HOST + scrapedurl
))
return itemlist return itemlist
def seccion(item): def seccion(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|\s{2}|-\s", "", data) data = re.sub(r"\n|\r|\t|\s{2}|-\s", "", data)
if item.extra == "destacados": if item.extra == "destacados":
patron_seccion = '<h4 class="widget-title">Destacados</h4><div class="textwidget"><ul>(.*?)</ul>' patron_seccion = '<h4 class="widget-title">Destacados</h4><div class="textwidget"><ul>(.*?)</ul>'
action = "findvideos" action = "findvideos"
else: else:
patron_seccion = '<h4 class="widget-title">Series destacadas</h4><div class="textwidget"><ul>(.*?)</ul>' patron_seccion = '<h4 class="widget-title">Series destacadas</h4><div class="textwidget"><ul>(.*?)</ul>'
action = "listado" action = "videos"
data = scrapertools.find_single_match(data, patron_seccion) data = scrapertools.find_single_match(data, patron_seccion)
matches = scrapertools.find_multiple_matches(data, '<a href="([^"]+)">(.*?)</a>')
matches = re.compile('<a href="([^"]+)">(.*?)</a>', re.DOTALL).findall(data)
aux_action = action aux_action = action
for url, title in matches: for url, title in matches:
if item.extra != "destacados" and "Cosmos (Carl Sagan)" in title: if item.extra != "destacados" and "Cosmos (Carl Sagan)" in title:
@@ -60,61 +84,46 @@ def seccion(item):
else: else:
action = aux_action action = aux_action
itemlist.append(item.clone(title=title, url=url, action=action, fulltitle=title)) itemlist.append(item.clone(title=title, url=url, action=action, fulltitle=title))
return itemlist return itemlist
def listado(item): def videos(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|\s{2}|-\s", "", data) data = re.sub(r"\n|\r|\t|\s{2}|-\s", "", data)
pagination = scrapertools.find_single_match(data, "rel='next' href='([^']+)'")
pagination = scrapertools.find_single_match(data, '<div class="older"><a href="([^"]+)"')
if not pagination: if not pagination:
pagination = scrapertools.find_single_match(data, '<span class=\'current\'>\d</span>' pagination = scrapertools.find_single_match(data, '<span class=\'current\'>\d</span>'
'<a class="page larger" href="([^"]+)">') '<a class="page larger" href="([^"]+)">')
patron = '<ul class="sp-grid">(.*?)</ul>' patron = '<ul class="sp-grid">(.*?)</ul>'
data = scrapertools.find_single_match(data, patron) data = scrapertools.find_single_match(data, patron)
matches = re.compile('<a href="([^"]+)">(.*?)</a>.*?<img.*?src="([^"]+)"', re.DOTALL).findall(data) matches = re.compile('<a href="([^"]+)">(.*?)</a>.*?<img.*?src="([^"]+)"', re.DOTALL).findall(data)
for url, title, thumb in matches: for url, title, thumb in matches:
itemlist.append(item.clone(title=title, url=url, action="findvideos", fulltitle=title, thumbnail=thumb)) itemlist.append(item.clone(title=title, url=url, action="findvideos", fulltitle=title, thumbnail=thumb))
if pagination: if pagination:
itemlist.append(item.clone(title=">> Página siguiente", url=pagination)) itemlist.append(item.clone(title=">> Página siguiente", url=pagination))
return itemlist return itemlist
def categorias(item): def categorias(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|\s{2}|-\s", "", data) data = re.sub(r"\n|\r|\t|\s{2}|-\s", "", data)
data = scrapertools.find_single_match(data, 'a href="#">Categorías</a><ul class="sub-menu">(.*?)</ul>') data = scrapertools.find_single_match(data, 'a href="#">Categorías</a><ul class="sub-menu">(.*?)</ul>')
matches = re.compile('<a href="([^"]+)">(.*?)</a>', re.DOTALL).findall(data) matches = scrapertools.find_multiple_matches(data, '<a href="([^"]+)">(.*?)</a>')
for url, title in matches: for url, title in matches:
itemlist.append(item.clone(title=title, url=url, action="listado", fulltitle=title)) itemlist.append(item.clone(title=title, url=url, action="videos", fulltitle=title))
return itemlist return itemlist
def search(item, texto): def search(item, texto):
logger.info() logger.info()
texto = texto.replace(" ", "+") texto = texto.replace(" ", "+")
try: try:
item.url = HOST + "?s=%s" % texto item.url = HOST + "?s=%s" % texto
return listado(item) return videos(item)
# Se captura la excepción, para no interrumpir al buscador global si un canal falla # Se captura la excepción, para no interrumpir al buscador global si un canal falla
except: except:
import sys import sys
@@ -125,37 +134,21 @@ def search(item, texto):
def findvideos(item): def findvideos(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|\s{2}|-\s", "", data) data = re.sub(r"\n|\r|\t|\s{2}|-\s", "", data)
if "Cosmos (Carl Sagan)" in item.title:
if item.fulltitle == "Cosmos (Carl Sagan)": patron = '(?s)<p><strong>([^<]+)<.*?'
patron += '<iframe.*?src="([^"]+)"'
matches = scrapertools.find_multiple_matches(data, matches = scrapertools.find_multiple_matches(data,patron)
'<p><strong>(.*?)</strong><br /><iframe.+?src="(https://www\.youtube\.com/[^?]+)')
for title, url in matches: for title, url in matches:
new_item = item.clone(title=title, url=url) itemlist.append(item.clone(action = "play", title=title, url=url
))
from core import servertools
aux_itemlist = servertools.find_video_items(new_item)
for videoitem in aux_itemlist:
videoitem.title = new_item.title
videoitem.fulltitle = new_item.title
videoitem.channel = item.channel
# videoitem.thumbnail = item.thumbnail
itemlist.extend(aux_itemlist)
else: else:
data = scrapertools.find_multiple_matches(data, '<iframe.+?src="(https://www\.youtube\.com/[^?]+)') data = scrapertools.find_multiple_matches(data, '<iframe.+?src="([^"]+)"')
from core import servertools
itemlist.extend(servertools.find_video_items(data=",".join(data))) itemlist.extend(servertools.find_video_items(data=",".join(data)))
for videoitem in itemlist: for videoitem in itemlist:
videoitem.fulltitle = item.fulltitle videoitem.fulltitle = item.fulltitle
videoitem.channel = item.channel videoitem.channel = item.channel
# videoitem.thumbnail = item.thumbnail itemlist = servertools.get_servers_itemlist(itemlist)
return itemlist return itemlist