# -*- coding: utf-8 -*- import re from core import httptools from core import scrapertools from core.item import Item from platformcode import logger HOST = "http://documentales-online.com/" def mainlist(item): logger.info() itemlist = list() itemlist.append(Item(channel=item.channel, title="Novedades", action="listado", url=HOST)) itemlist.append(Item(channel=item.channel, title="Destacados", action="seccion", url=HOST, extra="destacados")) itemlist.append(Item(channel=item.channel, title="Series Destacadas", action="seccion", url=HOST, extra="series")) # itemlist.append(Item(channel=item.channel, title="Top 100", action="categorias", url=HOST)) # itemlist.append(Item(channel=item.channel, title="Populares", action="categorias", url=HOST)) itemlist.append(Item(channel=item.channel, title="Buscar por:")) itemlist.append(Item(channel=item.channel, title=" Título", action="search")) itemlist.append(Item(channel=item.channel, title=" Categorías", action="categorias", url=HOST)) # itemlist.append(Item(channel=item.channel, title=" Series y Temas", action="categorias", url=HOST)) return itemlist def seccion(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data data = re.sub(r"\n|\r|\t|\s{2}|-\s", "", data) if item.extra == "destacados": patron_seccion = '

Destacados

' action = "findvideos" else: patron_seccion = '

Series destacadas

' action = "listado" data = scrapertools.find_single_match(data, patron_seccion) matches = re.compile('(.*?)', re.DOTALL).findall(data) aux_action = action for url, title in matches: if item.extra != "destacados" and "Cosmos (Carl Sagan)" in title: action = "findvideos" else: action = aux_action itemlist.append(item.clone(title=title, url=url, action=action, fulltitle=title)) return itemlist def listado(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data data = re.sub(r"\n|\r|\t|\s{2}|-\s", "", data) pagination = scrapertools.find_single_match(data, '
\d' '') patron = '
    (.*?)
' data = scrapertools.find_single_match(data, patron) matches = re.compile('
(.*?).*?Categorías') matches = re.compile('(.*?)', re.DOTALL).findall(data) for url, title in matches: itemlist.append(item.clone(title=title, url=url, action="listado", fulltitle=title)) return itemlist def search(item, texto): logger.info() texto = texto.replace(" ", "+") try: item.url = HOST + "?s=%s" % texto return listado(item) # Se captura la excepción, para no interrumpir al buscador global si un canal falla except: import sys for line in sys.exc_info(): logger.error("%s" % line) return [] def findvideos(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data data = re.sub(r"\n|\r|\t|\s{2}|-\s", "", data) if item.fulltitle == "Cosmos (Carl Sagan)": matches = scrapertools.find_multiple_matches(data, '

(.*?)