# -*- coding: utf-8 -*-
import re
from core import httptools
from core import scrapertools
from core.item import Item
from platformcode import logger
HOST = "http://documentales-online.com/"
def mainlist(item):
logger.info()
itemlist = list()
itemlist.append(Item(channel=item.channel, title="Novedades", action="listado", url=HOST))
itemlist.append(Item(channel=item.channel, title="Destacados", action="seccion", url=HOST, extra="destacados"))
itemlist.append(Item(channel=item.channel, title="Series Destacadas", action="seccion", url=HOST, extra="series"))
# itemlist.append(Item(channel=item.channel, title="Top 100", action="categorias", url=HOST))
# itemlist.append(Item(channel=item.channel, title="Populares", action="categorias", url=HOST))
itemlist.append(Item(channel=item.channel, title="Buscar por:"))
itemlist.append(Item(channel=item.channel, title=" Título", action="search"))
itemlist.append(Item(channel=item.channel, title=" Categorías", action="categorias", url=HOST))
# itemlist.append(Item(channel=item.channel, title=" Series y Temas", action="categorias", url=HOST))
return itemlist
def seccion(item):
logger.info()
itemlist = []
data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|\s{2}|-\s", "", data)
if item.extra == "destacados":
patron_seccion = '
'
action = "findvideos"
else:
patron_seccion = '
'
action = "listado"
data = scrapertools.find_single_match(data, patron_seccion)
matches = re.compile('
(.*?)', re.DOTALL).findall(data)
aux_action = action
for url, title in matches:
if item.extra != "destacados" and "Cosmos (Carl Sagan)" in title:
action = "findvideos"
else:
action = aux_action
itemlist.append(item.clone(title=title, url=url, action=action, fulltitle=title))
return itemlist
def listado(item):
logger.info()
itemlist = []
data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|\s{2}|-\s", "", data)
pagination = scrapertools.find_single_match(data, '
\d'
'')
patron = ''
data = scrapertools.find_single_match(data, patron)
matches = re.compile('(.*?).*?
> Página siguiente", url=pagination))
return itemlist
def categorias(item):
logger.info()
itemlist = []
data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|\s{2}|-\s", "", data)
data = scrapertools.find_single_match(data, 'a href="#">Categorías')
matches = re.compile('(.*?)', re.DOTALL).findall(data)
for url, title in matches:
itemlist.append(item.clone(title=title, url=url, action="listado", fulltitle=title))
return itemlist
def search(item, texto):
logger.info()
texto = texto.replace(" ", "+")
try:
item.url = HOST + "?s=%s" % texto
return listado(item)
# Se captura la excepción, para no interrumpir al buscador global si un canal falla
except:
import sys
for line in sys.exc_info():
logger.error("%s" % line)
return []
def findvideos(item):
logger.info()
itemlist = []
data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|\s{2}|-\s", "", data)
if item.fulltitle == "Cosmos (Carl Sagan)":
matches = scrapertools.find_multiple_matches(data,
'(.*?)