# -*- coding: utf-8 -*- import re import urlparse from core import scrapertools from core.item import Item from platformcode import logger host = "http://doramastv.com/" DEFAULT_HEADERS = [] DEFAULT_HEADERS.append( ["User-Agent", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; es-ES; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12"]) def mainlist(item): logger.info() itemlist = list([]) itemlist.append( Item(channel=item.channel, action="pagina_", title="En emision", url=urlparse.urljoin(host, "drama/emision"))) itemlist.append(Item(channel=item.channel, action="letras", title="Listado alfabetico", url=urlparse.urljoin(host, "lista-numeros"))) itemlist.append( Item(channel=item.channel, action="generos", title="Generos", url=urlparse.urljoin(host, "genero/accion"))) itemlist.append(Item(channel=item.channel, action="pagina_", title="Ultimos agregados", url=urlparse.urljoin(host, "dramas/ultimos"))) itemlist.append(Item(channel=item.channel, action="search", title="Buscar", url=urlparse.urljoin(host, "buscar/anime/ajax/?title="))) return itemlist def letras(item): logger.info() itemlist = [] headers = DEFAULT_HEADERS[:] data = scrapertools.cache_page(item.url, headers=headers) patron = ' (.+?)<' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle in matches: title = scrapertools.entityunescape(scrapedtitle) url = urlparse.urljoin(host, scrapedurl) thumbnail = "" plot = "" logger.debug("title=[{0}], url=[{1}], thumbnail=[{2}]".format(title, url, thumbnail)) itemlist.append( Item(channel=item.channel, action="pagina_", title=title, url=url, thumbnail=thumbnail, plot=plot)) return itemlist def pagina_(item): logger.info() itemlist = [] headers = DEFAULT_HEADERS[:] data = scrapertools.cache_page(item.url, headers=headers) data1 = scrapertools.get_match(data, '
(.+?)') data1 = data1.replace('\n', '') data1 = data1.replace('\r', '') patron = 'href="(\/drama.+?)".+?<\/div>(.+?)<\/div>.+?src="(.+?)".+?titulo">(.+?)<' matches = re.compile(patron, re.DOTALL).findall(data1) for scrapedurl, scrapedplot, scrapedthumbnail, scrapedtitle in matches: title = scrapertools.unescape(scrapedtitle).strip() url = urlparse.urljoin(item.url, scrapedurl) thumbnail = urlparse.urljoin(host, scrapedthumbnail) plot = scrapertools.decodeHtmlentities(scrapedplot) itemlist.append( Item(channel=item.channel, action="episodios", title=title, url=url, thumbnail=thumbnail, plot=plot, show=title)) patron = 'href="([^"]+)" class="next"' matches = re.compile(patron, re.DOTALL).findall(data) for match in matches: if len(matches) > 0: scrapedurl = urlparse.urljoin(item.url, match) scrapedtitle = "Pagina Siguiente >>" scrapedthumbnail = "" scrapedplot = "" itemlist.append(Item(channel=item.channel, action="pagina_", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True)) return itemlist def episodios(item): logger.info() itemlist = [] headers = DEFAULT_HEADERS[:] data = scrapertools.cache_page(item.url, headers=headers) data = data.replace('\n', '') data = data.replace('\r', '') data1 = scrapertools.get_match(data, '') patron = '(.+?)<' matches = re.compile(patron, re.DOTALL).findall(data1) for scrapedurl, scrapedtitle in matches: title = scrapertools.htmlclean(scrapedtitle).strip() thumbnail = "" plot = "" url = urlparse.urljoin(item.url, scrapedurl) show = item.show itemlist.append( Item(channel=item.channel, action="findvideos", title=title, url=url, thumbnail=thumbnail, plot=plot, fulltitle=title, show=show)) return itemlist def findvideos(item): logger.info() headers = DEFAULT_HEADERS[:] data = scrapertools.cache_page(item.url, headers=headers) data = data.replace('\n', '') data = data.replace('\r', '') patron = '