# -*- coding: utf-8 -*- import re import urlparse from core import httptools from core import scrapertools from core import servertools from core.item import Item from platformcode import config, logger host = "http://www.peliculashindu.com/" def mainlist(item): logger.info() itemlist = list() #itemlist.append( # Item(channel=item.channel, action="lista", title="Top Películas", url=urlparse.urljoin(host, "top"))) #itemlist.append(Item(channel=item.channel, action="lista", title="Novedades", url=host)) itemlist.append(Item(channel=item.channel, action="explorar", title="Género", url=urlparse.urljoin(host, "genero"))) #itemlist.append(Item(channel=item.channel, action="explorar", title="Listado Alfabético", # url=urlparse.urljoin(host, "alfabetico"))) itemlist.append(Item(channel=item.channel, action="explorar", title="Listado por Año", url=urlparse.urljoin(host, "genero"))) #itemlist.append(Item(channel=item.channel, action="lista", title="Otras Películas (No Bollywood)", # url=urlparse.urljoin(host, "estrenos"))) #itemlist.append(Item(channel=item.channel, title="Buscar", action="search", url=urlparse.urljoin(host, "buscar-"))) return itemlist def explorar(item): logger.info() itemlist = list() urltitle = item.title data = httptools.downloadpage(item.url).data data = re.sub(r"\n|\r|\t|\s{2}| ", "", data) if 'Género' in urltitle: patron = "var accion = '
(.+?)<\/div>'" #if 'Listado Alfabético' in urltitle: # patron = '<\/li><\/ul>.+?

Pel.+?tico<\/h3>(.+?)<\/h3>' if 'Año' in urltitle: patron = "var anho = '
(.+?)<\/div>'" data_explorar = scrapertools.find_single_match(data, patron) patron_explorar = '
  • (.+?)<\/a><\/li>' matches = scrapertools.find_multiple_matches(data_explorar, patron_explorar) for scrapedurl, scrapedtitle in matches: if 'Acci' in scrapedtitle: scrapedtitle = 'Acción' if 'Anima' in scrapedtitle: scrapedtitle = 'Animación' if 'Fanta' in scrapedtitle: scrapedtitle = 'Fantasía' if 'Hist' in scrapedtitle: scrapedtitle = 'Histórico' if 'lico Guerra' in scrapedtitle: scrapedtitle = 'Bélico Guerra' if 'Biogra' in scrapedtitle: scrapedtitle = 'Biografía' if 'Ficcion' in scrapedtitle: scrapedtitle = 'Ciencia Ficción' itemlist.append(item.clone(action='lista', title=scrapedtitle, url=scrapedurl)) return itemlist def search(item, texto): logger.info() texto = texto.replace(" ", "-") item.url = item.url + texto # logger.info("item="+item.url) if texto != '': return lista(item) def lista(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data data = re.sub(r"\n|\r|\t|\s{2}| ", "", data) # Eliminamos tabuladores, dobles espacios saltos de linea, etc... data_mov= scrapertools.find_single_match(data,'