# -*- coding: utf-8 -*- import re import urlparse from core import httptools from core import scrapertools from core import servertools from core.item import Item from platformcode import config, logger host = "http://www.peliculashindu.com/" def mainlist(item): logger.info() itemlist = list() #itemlist.append( # Item(channel=item.channel, action="lista", title="Top Películas", url=urlparse.urljoin(host, "top"))) #itemlist.append(Item(channel=item.channel, action="lista", title="Novedades", url=host)) itemlist.append(Item(channel=item.channel, action="explorar", title="Género", url=urlparse.urljoin(host, "genero"))) #itemlist.append(Item(channel=item.channel, action="explorar", title="Listado Alfabético", # url=urlparse.urljoin(host, "alfabetico"))) itemlist.append(Item(channel=item.channel, action="explorar", title="Listado por Año", url=urlparse.urljoin(host, "genero"))) #itemlist.append(Item(channel=item.channel, action="lista", title="Otras Películas (No Bollywood)", # url=urlparse.urljoin(host, "estrenos"))) #itemlist.append(Item(channel=item.channel, title="Buscar", action="search", url=urlparse.urljoin(host, "buscar-"))) return itemlist def explorar(item): logger.info() itemlist = list() urltitle = item.title data = httptools.downloadpage(item.url).data data = re.sub(r"\n|\r|\t|\s{2}| ", "", data) if 'Género' in urltitle: patron = "var accion = '
(.+?)<\/p>.+?" #scrapedplot patron += "
(.+?)" #scrapedyear patron += "<\/p>.+?