# -*- coding: utf-8 -*- # ------------------------------------------------------------ # Ringraziamo Icarus crew # Canale hdblog # ------------------------------------------------------------ import re import urlparse from core import httptools, scrapertools from core.item import Item from platformcode import logger from platformcode import config host = "https://www.hdblog.it" def mainlist(item): logger.info("kod.hdblog mainlist") itemlist = [Item(channel=item.channel, title="[COLOR azure]Video recensioni tecnologiche[/COLOR]", action="peliculas", url=host + "/video/", thumbnail="http://www.crat-arct.org/uploads/images/tic%201.jpg"), Item(channel=item.channel, title="[COLOR azure]Categorie[/COLOR]", action="categorias", url=host + "/video/", thumbnail="http://www.crat-arct.org/uploads/images/tic%201.jpg")] return itemlist def categorias(item): logger.info("kod.hdblog categorias") itemlist = [] data = httptools.downloadpage(item.url).data logger.info(data) # Narrow search by selecting only the combo start = data.find('
') end = data.find('
', start) bloque = data[start:end] # The categories are the options for the combo patron = ']+>(.*?)' matches = re.compile(patron, re.DOTALL).findall(bloque) scrapertools.printMatches(matches) for scrapedurl, scrapedtitle in matches: scrapedthumbnail = "" scrapedplot = "" itemlist.append( Item(channel=item.channel, action="peliculas", title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl + "video/", thumbnail=scrapedthumbnail, plot=scrapedplot)) return itemlist def peliculas(item): logger.info("kod.hdblog peliculas") itemlist = [] # Carica la pagina data = httptools.downloadpage(item.url).data # Estrae i contenuti patron = '\s*]+>\s*\s*[^>]+>\s*(.*?)\s*<' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, scrapedthumbnail, scrapedtitle in matches: scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle) scrapedplot = "" itemlist.append(Item(channel=item.channel, action="findvideos", fulltitle=scrapedtitle, show=scrapedtitle, title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True)) # Paginazione patronvideos = '[^>]+>[^=]+="next" href="(.*?)" class="inattiva">' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: scrapedurl = urlparse.urljoin(item.url, matches[0]) itemlist.append( Item(channel=item.channel, action="peliculas", title="[COLOR orange]Avanti >>[/COLOR]", url=scrapedurl, folder=True)) return itemlist