# -*- coding: utf-8 -*- #------------------------------------------------------------ import urlparse,urllib2,urllib,re import os, sys from platformcode import config, logger from core import scrapertools from core.item import Item from core import servertools from core import httptools host = 'http://www.absoluporn.es' def mainlist(item): logger.info() itemlist = [] itemlist.append( Item(channel=item.channel, title="Nuevos" , action="lista", url=host + "/wall-date-1.html")) itemlist.append( Item(channel=item.channel, title="Mas valorados" , action="lista", url=host + "/wall-note-1.html")) itemlist.append( Item(channel=item.channel, title="Mas vistos" , action="lista", url=host + "/wall-main-1.html")) itemlist.append( Item(channel=item.channel, title="Mas largos" , action="lista", url=host + "/wall-time-1.html")) itemlist.append( Item(channel=item.channel, title="Categorias" , action="categorias", url=host)) itemlist.append( Item(channel=item.channel, title="Buscar", action="search")) return itemlist def search(item, texto): logger.info() texto = texto.replace(" ", "+") item.url = host + "/search-%s-1.html" % texto try: return lista(item) except: import sys for line in sys.exc_info(): logger.error("%s" % line) return [] def categorias(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data patron = ' ([^"]+)' matches = re.compile(patron,re.DOTALL).findall(data) for scrapedurl,scrapedtitle in matches: scrapedplot = "" scrapedthumbnail = "" scrapedurl = scrapedurl.replace(".html", "_date.html") scrapedurl = host +"/" + scrapedurl itemlist.append( Item(channel=item.channel, action="lista", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail , plot=scrapedplot) ) return itemlist def lista(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data data = re.sub(r"\n|\r|\t| |
|
", "", data) patron = '
\d+