Corrección scrapertools.cachepage y scrapertools.get_match

This commit is contained in:
paezner
2019-04-03 17:19:52 +02:00
parent 867a14fe23
commit 550e67da94
33 changed files with 76 additions and 112 deletions
+3 -19
View File
@@ -14,6 +14,7 @@ host = 'http://www.alsoporn.com'
def mainlist(item): def mainlist(item):
logger.info() logger.info()
itemlist = [] itemlist = []
# itemlist.append( Item(channel=item.channel, title="Nuevos" , action="lista", url=host + "/en/g/All/new/1"))
itemlist.append( Item(channel=item.channel, title="Top" , action="lista", url=host + "/g/All/top/1")) itemlist.append( Item(channel=item.channel, title="Top" , action="lista", url=host + "/g/All/top/1"))
itemlist.append( Item(channel=item.channel, title="Categorias" , action="categorias", url=host)) itemlist.append( Item(channel=item.channel, title="Categorias" , action="categorias", url=host))
itemlist.append( Item(channel=item.channel, title="Buscar", action="search")) itemlist.append( Item(channel=item.channel, title="Buscar", action="search"))
@@ -33,23 +34,6 @@ def search(item, texto):
return [] return []
def catalogo(item):
logger.info()
itemlist = []
data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data,'<h3>CLIPS</h3>(.*?)<h3>FILM</h3>')
data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
patron = '<li><a href="([^"]+)" title="">.*?'
patron += '<span class="videos-count">([^"]+)</span><span class="title">([^"]+)</span>'
matches = re.compile(patron,re.DOTALL).findall(data)
for scrapedurl,cantidad,scrapedtitle in matches:
scrapedplot = ""
scrapedthumbnail = ""
itemlist.append( Item(channel=item.channel, action="lista", title=scrapedtitle, url=scrapedurl,
thumbnail=scrapedthumbnail, plot=scrapedplot) )
return itemlist
def categorias(item): def categorias(item):
logger.info() logger.info()
itemlist = [] itemlist = []
@@ -96,10 +80,10 @@ def play(item):
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
scrapedurl = scrapertools.find_single_match(data,'<iframe frameborder=0 scrolling="no" src=\'([^\']+)\'') scrapedurl = scrapertools.find_single_match(data,'<iframe frameborder=0 scrolling="no" src=\'([^\']+)\'')
data = scrapertools.cachePage(scrapedurl) data = httptools.downloadpage(item.url).data
scrapedurl1 = scrapertools.find_single_match(data,'<iframe src="(.*?)"') scrapedurl1 = scrapertools.find_single_match(data,'<iframe src="(.*?)"')
scrapedurl1 = scrapedurl1.replace("//www.playercdn.com/ec/i2.php?", "https://www.trinitytube.xyz/ec/i2.php?") scrapedurl1 = scrapedurl1.replace("//www.playercdn.com/ec/i2.php?", "https://www.trinitytube.xyz/ec/i2.php?")
data = scrapertools.cachePage(scrapedurl1) data = httptools.downloadpage(item.url).data
scrapedurl2 = scrapertools.find_single_match(data,'<source src="(.*?)"') scrapedurl2 = scrapertools.find_single_match(data,'<source src="(.*?)"')
itemlist.append(item.clone(action="play", title=item.title, fulltitle = item.title, url=scrapedurl2)) itemlist.append(item.clone(action="play", title=item.title, fulltitle = item.title, url=scrapedurl2))
return itemlist return itemlist
+1 -1
View File
@@ -41,7 +41,7 @@ def catalogo(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data,'<strong class="popup-title">Canales</strong>(.*?)<strong>Models</strong>') data = scrapertools.find_single_match(data,'<strong class="popup-title">Canales</strong>(.*?)<strong>Models</strong>')
data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data) data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
patron = '<li><a class="item" href="([^"]+)" title="([^"]+)">' patron = '<li><a class="item" href="([^"]+)" title="([^"]+)">'
matches = re.compile(patron,re.DOTALL).findall(data) matches = re.compile(patron,re.DOTALL).findall(data)
+1 -1
View File
@@ -78,7 +78,7 @@ def videos(item):
folder=True, contentType="movie")) folder=True, contentType="movie"))
# Paginador # Paginador
Actual = int(scrapertools.get_match(item.url, url_api + '/index/[^/]+/([0-9]+)/pc')) Actual = int(scrapertools.find_single_match(item.url, url_api + '/index/[^/]+/([0-9]+)/pc'))
if JSONData["pages"] - 1 > Actual: if JSONData["pages"] - 1 > Actual:
scrapedurl = item.url.replace("/" + str(Actual) + "/", "/" + str(Actual + 1) + "/") scrapedurl = item.url.replace("/" + str(Actual) + "/", "/" + str(Actual + 1) + "/")
itemlist.append( itemlist.append(
+10 -6
View File
@@ -14,6 +14,7 @@ host = 'https://www.cine-online.eu'
IDIOMAS = {'Español': 'ESP', 'Cast': 'ESP', 'Latino': 'LAT', 'Lat': 'LAT', 'Subtitulado': 'VOSE', 'Sub': 'VOSE'} IDIOMAS = {'Español': 'ESP', 'Cast': 'ESP', 'Latino': 'LAT', 'Lat': 'LAT', 'Subtitulado': 'VOSE', 'Sub': 'VOSE'}
list_language = IDIOMAS.values() list_language = IDIOMAS.values()
list_servers = ['Streamango', 'Vidoza', 'Openload', 'Streamcherry', 'Netutv'] list_servers = ['Streamango', 'Vidoza', 'Openload', 'Streamcherry', 'Netutv']
# list_quality = ['Brscreener', 'HD', 'TS']
list_quality = [] list_quality = []
__channel__='cineonline' __channel__='cineonline'
__comprueba_enlaces__ = config.get_setting('comprueba_enlaces', __channel__) __comprueba_enlaces__ = config.get_setting('comprueba_enlaces', __channel__)
@@ -75,7 +76,7 @@ def categorias(item):
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
if "Año" in item.title: if "Año" in item.title:
data = scrapertools.get_match(data,'<h3>Año de estreno(.*?)</ul>') data = scrapertools.find_single_match(data,'<h3>Año de estreno(.*?)</ul>')
patron = '<li><a href="([^"]+)">(\d+)</(\w)>' patron = '<li><a href="([^"]+)">(\d+)</(\w)>'
else: else:
patron = '<li class="cat-item cat-item-\d+"><a href="([^"]+)">([^"]+)</a> <span>(\d+)</span>' patron = '<li class="cat-item cat-item-\d+"><a href="([^"]+)">([^"]+)</a> <span>(\d+)</span>'
@@ -89,6 +90,9 @@ def categorias(item):
return itemlist return itemlist
def lista(item): def lista(item):
logger.info() logger.info()
itemlist = [] itemlist = []
@@ -200,11 +204,11 @@ def findvideos(item):
itemlist = filtertools.get_links(itemlist, item, list_language) itemlist = filtertools.get_links(itemlist, item, list_language)
# Requerido para AutoPlay # Requerido para AutoPlay
autoplay.start(itemlist, item) autoplay.start(itemlist, item)
if not "/episodios/" in item.url:
if config.get_videolibrary_support() and len(itemlist) > 0 and item.extra !='findvideos': if config.get_videolibrary_support() and len(itemlist) > 0 and item.extra !='findvideos' and not "/episodios/" in item.url :
itemlist.append(Item(channel=item.channel, action="add_pelicula_to_library", itemlist.append(Item(channel=item.channel, action="add_pelicula_to_library",
title='[COLOR yellow]Añadir esta pelicula a la videoteca[/COLOR]', url=item.url, title='[COLOR yellow]Añadir esta pelicula a la videoteca[/COLOR]', url=item.url,
extra="findvideos", contentTitle=item.contentTitle)) extra="findvideos", contentTitle=item.contentTitle))
return itemlist return itemlist
+1 -1
View File
@@ -37,7 +37,7 @@ def categorias(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data,'<div class="category">(.*?)</ul>') data = scrapertools.find_single_match(data,'<div class="category">(.*?)</ul>')
patron = '<li><a href="(.*?)".*?>(.*?)</a></li>' patron = '<li><a href="(.*?)".*?>(.*?)</a></li>'
matches = re.compile(patron,re.DOTALL).findall(data) matches = re.compile(patron,re.DOTALL).findall(data)
scrapertools.printMatches(matches) scrapertools.printMatches(matches)
+1 -1
View File
@@ -37,7 +37,7 @@ def categorias(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data,'<h2>TAGS</h2>(.*?)<div class="sideitem"') data = scrapertools.find_single_match(data,'<h2>TAGS</h2>(.*?)<div class="sideitem"')
patron = '<a href="(.*?)".*?>(.*?)</a>' patron = '<a href="(.*?)".*?>(.*?)</a>'
matches = re.compile(patron,re.DOTALL).findall(data) matches = re.compile(patron,re.DOTALL).findall(data)
for scrapedurl,scrapedtitle in matches: for scrapedurl,scrapedtitle in matches:
+1 -1
View File
@@ -15,7 +15,7 @@ def mainlist(item):
if item.url=="": if item.url=="":
item.url = "http://www.filmovix.net/videoscategory/porno/" item.url = "http://www.filmovix.net/videoscategory/porno/"
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data,'<h1 class="cat_head">XXX</h1>(.*?)<h3> Novo dodato </h3>') data = scrapertools.find_single_match(data,'<h1 class="cat_head">XXX</h1>(.*?)<h3> Novo dodato </h3>')
patron = '<li class="clearfix">.*?' patron = '<li class="clearfix">.*?'
patron += 'src="([^"]+)".*?' patron += 'src="([^"]+)".*?'
patron += '<p class="title"><a href="([^"]+)" rel="bookmark" title="([^"]+)">' patron += '<p class="title"><a href="([^"]+)" rel="bookmark" title="([^"]+)">'
+2 -1
View File
@@ -80,7 +80,8 @@ def lista(item):
def play(item): def play(item):
logger.info() logger.info()
itemlist = [] itemlist = []
url = scrapertools.find_single_match(scrapertools.cachePage(item.url),'<iframe src="([^"]+)"') data = httptools.downloadpage(item.url).data
url = scrapertools.find_single_match(data,'<iframe src="([^"]+)"')
data = httptools.downloadpage(url).data data = httptools.downloadpage(url).data
patron = 'html5player.setVideoHLS\\(\'([^\']+)\'' patron = 'html5player.setVideoHLS\\(\'([^\']+)\''
matches = scrapertools.find_multiple_matches(data, patron) matches = scrapertools.find_multiple_matches(data, patron)
@@ -39,7 +39,7 @@ def catalogo(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data,'>Top Sites</a>(.*?)</aside>') data = scrapertools.find_single_match(data,'>Top Sites</a>(.*?)</aside>')
data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data) data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
patron = '<li id="menu-item-\d+".*?<a href="([^"]+)">([^"]+)</a></li>' patron = '<li id="menu-item-\d+".*?<a href="([^"]+)">([^"]+)</a></li>'
matches = re.compile(patron,re.DOTALL).findall(data) matches = re.compile(patron,re.DOTALL).findall(data)
@@ -54,7 +54,7 @@ def categorias(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data,'Top Tags(.*?)</ul>') data = scrapertools.find_single_match(data,'Top Tags(.*?)</ul>')
data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data) data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
patron = '<a href="([^"]+)">(.*?)</a>' patron = '<a href="([^"]+)">(.*?)</a>'
matches = re.compile(patron,re.DOTALL).findall(data) matches = re.compile(patron,re.DOTALL).findall(data)
+1 -1
View File
@@ -68,7 +68,7 @@ def categorias(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data,'<a>CATEGORÍAS</a>(.*?)</ul>') data = scrapertools.find_single_match(data,'<a>CATEGORÍAS</a>(.*?)</ul>')
patron = '<a href="([^"]+)">([^"]+)</a>' patron = '<a href="([^"]+)">([^"]+)</a>'
matches = re.compile(patron,re.DOTALL).findall(data) matches = re.compile(patron,re.DOTALL).findall(data)
for scrapedurl,scrapedtitle in matches: for scrapedurl,scrapedtitle in matches:
+2 -2
View File
@@ -37,7 +37,7 @@ def categorias(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data,'<ul class="cf">(.*?)</ul>') data = scrapertools.find_single_match(data,'<ul class="cf">(.*?)</ul>')
data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data) data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
patron = '<li>.*?<a href="([^"]+)".*?' patron = '<li>.*?<a href="([^"]+)".*?'
patron += '<img class="thumb" src="([^"]+)" alt="([^"]+)".*?' patron += '<img class="thumb" src="([^"]+)" alt="([^"]+)".*?'
@@ -57,7 +57,7 @@ def lista(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data,'<ul class="cf">(.*?)<h2>Advertisement</h2>') data = scrapertools.find_single_match(data,'<ul class="cf">(.*?)<h2>Advertisement</h2>')
patron = '<li>.*?<a href="([^"]+)".*?' patron = '<li>.*?<a href="([^"]+)".*?'
patron += 'src="([^"]+)" alt="([^"]+)".*?' patron += 'src="([^"]+)" alt="([^"]+)".*?'
patron += '<span class="time">(.*?)</span>' patron += '<span class="time">(.*?)</span>'
+2 -2
View File
@@ -42,9 +42,9 @@ def categorias(item):
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
if item.title == "Categorias": if item.title == "Categorias":
data = scrapertools.get_match(data, '<a href="#">Genres</a>(.*?)</ul>') data = scrapertools.find_single_match(data, '<a href="#">Genres</a>(.*?)</ul>')
else: else:
data = scrapertools.get_match(data, '<a href="#">Studios</a>(.*?)</ul>') data = scrapertools.find_single_match(data, '<a href="#">Studios</a>(.*?)</ul>')
data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data) data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
patron = '<a href="([^"]+)">([^<]+)</a>' patron = '<a href="([^"]+)">([^<]+)</a>'
matches = re.compile(patron, re.DOTALL).findall(data) matches = re.compile(patron, re.DOTALL).findall(data)
+2 -2
View File
@@ -42,9 +42,9 @@ def categorias(item):
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data) data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
if "/category/movies/" in item.url: if "/category/movies/" in item.url:
data = scrapertools.get_match(data,'>Movies</a>(.*?)</ul>') data = scrapertools.find_single_match(data,'>Movies</a>(.*?)</ul>')
else: else:
data = scrapertools.get_match(data,'>Clips</a>(.*?)</ul>') data = scrapertools.find_single_match(data,'>Clips</a>(.*?)</ul>')
patron = '<a href=([^"]+)>([^"]+)</a>' patron = '<a href=([^"]+)>([^"]+)</a>'
matches = re.compile(patron,re.DOTALL).findall(data) matches = re.compile(patron,re.DOTALL).findall(data)
scrapertools.printMatches(matches) scrapertools.printMatches(matches)
+2 -2
View File
@@ -41,9 +41,9 @@ def categorias(item):
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
if item.title == "Categorias" : if item.title == "Categorias" :
data = scrapertools.get_match(data,'Categories(.*?)Channels') data = scrapertools.find_single_match(data,'Categories(.*?)Channels')
else: else:
data = scrapertools.get_match(data,'Channels(.*?)</ul>') data = scrapertools.find_single_match(data,'Channels(.*?)</ul>')
patron = '<li><a href="([^"]+)" title="[^"]+">(.*?)</a>' patron = '<li><a href="([^"]+)" title="[^"]+">(.*?)</a>'
matches = re.compile(patron,re.DOTALL).findall(data) matches = re.compile(patron,re.DOTALL).findall(data)
scrapertools.printMatches(matches) scrapertools.printMatches(matches)
+2 -2
View File
@@ -54,7 +54,7 @@ def lista(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data,'<div class="videos-list">(.*?)<div class="videos-list">') data = scrapertools.find_single_match(data,'<div class="videos-list">(.*?)<div class="videos-list">')
data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data) data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
patron = '<article id="post-\d+".*?' patron = '<article id="post-\d+".*?'
patron += '<a href="([^"]+)" title="([^"]+)">.*?' patron += '<a href="([^"]+)" title="([^"]+)">.*?'
@@ -82,7 +82,7 @@ def play(item):
url = scrapertools.find_single_match(data,'<meta itemprop="embedURL" content="([^"]+)"') url = scrapertools.find_single_match(data,'<meta itemprop="embedURL" content="([^"]+)"')
url = url.replace("pornhub.com/embed/", "pornhub.com/view_video.php?viewkey=") url = url.replace("pornhub.com/embed/", "pornhub.com/view_video.php?viewkey=")
data = httptools.downloadpage(url).data data = httptools.downloadpage(url).data
# data = scrapertools.cachePage(url) https://www.spankwire.com/EmbedPlayer.aspx?ArticleId=14049072 # https://www.spankwire.com/EmbedPlayer.aspx?ArticleId=14049072
if "xvideos" in url : if "xvideos" in url :
scrapedurl = scrapertools.find_single_match(data,'setVideoHLS\(\'([^\']+)\'') scrapedurl = scrapertools.find_single_match(data,'setVideoHLS\(\'([^\']+)\'')
if "pornhub" in url : if "pornhub" in url :
+2 -2
View File
@@ -39,7 +39,7 @@ def canales(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(host).data data = httptools.downloadpage(host).data
data = scrapertools.get_match(data, 'Top Networks</a>(.*?)</ul>') data = scrapertools.find_single_match(data, 'Top Networks</a>(.*?)</ul>')
patron = '<li id=.*?<a href="(.*?)">(.*?)</a></li>' patron = '<li id=.*?<a href="(.*?)">(.*?)</a></li>'
matches = re.compile(patron, re.DOTALL).findall(data) matches = re.compile(patron, re.DOTALL).findall(data)
for scrapedurl, scrapedtitle in matches: for scrapedurl, scrapedtitle in matches:
@@ -56,7 +56,7 @@ def categorias(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data, 'More Categories</a>(.*?)</ul>') data = scrapertools.find_single_match(data, 'More Categories</a>(.*?)</ul>')
patron = '<li id=.*?<a href="(.*?)">(.*?)</a></li>' patron = '<li id=.*?<a href="(.*?)">(.*?)</a></li>'
matches = re.compile(patron, re.DOTALL).findall(data) matches = re.compile(patron, re.DOTALL).findall(data)
for scrapedurl, scrapedtitle in matches: for scrapedurl, scrapedtitle in matches:
+2 -2
View File
@@ -42,9 +42,9 @@ def categorias(item):
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
if item.title == "Canal" : if item.title == "Canal" :
data = scrapertools.get_match(data,'>Adult Porn Parodies</a></li>(.*?)</ul>') data = scrapertools.find_single_match(data,'>Adult Porn Parodies</a></li>(.*?)</ul>')
else: else:
data = scrapertools.get_match(data,'<div class="nav-wrap">(.*?)<ul class="sub-menu">') data = scrapertools.find_single_match(data,'<div class="nav-wrap">(.*?)<ul class="sub-menu">')
itemlist.append( Item(channel=item.channel, action="lista", title="Big tit", url="https://sexofilm.com/?s=big+tits")) itemlist.append( Item(channel=item.channel, action="lista", title="Big tit", url="https://sexofilm.com/?s=big+tits"))
patron = '<a href="([^<]+)">([^<]+)</a>' patron = '<a href="([^<]+)">([^<]+)</a>'
matches = re.compile(patron,re.DOTALL).findall(data) matches = re.compile(patron,re.DOTALL).findall(data)
+1 -1
View File
@@ -42,7 +42,7 @@ def categorias(item):
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|&nbsp;|<br>|<br/>", "", data) data = re.sub(r"\n|\r|\t|&nbsp;|<br>|<br/>", "", data)
data = scrapertools.get_match(data,'<div id="content">(.*?)<div class="maincat">') data = scrapertools.find_single_match(data,'<div id="content">(.*?)<div class="maincat">')
patron = '<a href="(.*?)".*?' patron = '<a href="(.*?)".*?'
patron += '<img src="(.*?)".*?alt="(.*?)"' patron += '<img src="(.*?)".*?alt="(.*?)"'
matches = re.compile(patron,re.DOTALL).findall(data) matches = re.compile(patron,re.DOTALL).findall(data)
+2 -2
View File
@@ -51,7 +51,7 @@ def categorias(item):
scrapedtitle = scrapedtitle + " (" + cantidad +")" scrapedtitle = scrapedtitle + " (" + cantidad +")"
scrapedurl = urlparse.urljoin(item.url,scrapedurl) + "/Submitted/59" scrapedurl = urlparse.urljoin(item.url,scrapedurl) + "/Submitted/59"
itemlist.append( Item(channel=item.channel, action="lista", title=scrapedtitle, url=scrapedurl, itemlist.append( Item(channel=item.channel, action="lista", title=scrapedtitle, url=scrapedurl,
thumbnail=scrapedthumbnail, fanart=scrapedthumbnail, plot=scrapedplot) ) fanart=scrapedthumbnail, thumbnail=scrapedthumbnail, plot=scrapedplot) )
return itemlist return itemlist
@@ -88,7 +88,7 @@ def play(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data,'Copy Embed Code(.*?)For Desktop') data = scrapertools.find_single_match(data,'Copy Embed Code(.*?)For Desktop')
patron = '<div class="shareDownload_container__item__dropdown">.*?<a href="([^"]+)"' patron = '<div class="shareDownload_container__item__dropdown">.*?<a href="([^"]+)"'
matches = scrapertools.find_multiple_matches(data, patron) matches = scrapertools.find_multiple_matches(data, patron)
for scrapedurl in matches: for scrapedurl in matches:
+2 -2
View File
@@ -38,7 +38,7 @@ def catalogo(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data,'PaySites(.*?)<li id="menu-item-28040"') data = scrapertools.find_single_match(data,'PaySites(.*?)<li id="menu-item-28040"')
data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data) data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
patron = '<li id="menu-item-\d+".*?<a href="([^"]+)">([^"]+)</a>' patron = '<li id="menu-item-\d+".*?<a href="([^"]+)">([^"]+)</a>'
matches = re.compile(patron,re.DOTALL).findall(data) matches = re.compile(patron,re.DOTALL).findall(data)
@@ -55,7 +55,7 @@ def categorias(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data,'<a href="#">Categories</a>(.*?)</ul>') data = scrapertools.find_single_match(data,'<a href="#">Categories</a>(.*?)</ul>')
data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data) data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
patron = '<li id="menu-item-\d+".*?<a href="([^"]+)">([^"]+)</a>' patron = '<li id="menu-item-\d+".*?<a href="([^"]+)">([^"]+)</a>'
matches = re.compile(patron,re.DOTALL).findall(data) matches = re.compile(patron,re.DOTALL).findall(data)
+7 -6
View File
@@ -40,16 +40,17 @@ def categorias(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data,'<div class="category-item">(.*?)<div id="goupBlock"')
data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data) data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
patron = '<a href="([^"]+)">\s*(.*?)\s*<' patron = '<div class="thumb-container with-title moviec">.*?'
patron += '<a href="([^"]+)".*?'
patron += 'src="([^"]+)".*?'
patron += '<a title="([^"]+)".*?'
matches = re.compile(patron,re.DOTALL).findall(data) matches = re.compile(patron,re.DOTALL).findall(data)
for scrapedurl,scrapedtitle in matches: for scrapedurl,scrapedthumbnail,scrapedtitle in matches:
scrapedplot = "" scrapedplot = ""
scrapedthumbnail = ""
scrapedurl = scrapedurl + "/most-recent/" scrapedurl = scrapedurl + "/most-recent/"
itemlist.append( Item(channel=item.channel, action="lista", title=scrapedtitle, url=scrapedurl, itemlist.append( Item(channel=item.channel, action="lista", title=scrapedtitle, url=scrapedurl,
thumbnail=scrapedthumbnail, plot=scrapedplot) ) fanart=scrapedthumbnail, thumbnail=scrapedthumbnail, plot=scrapedplot) )
return itemlist return itemlist
@@ -80,7 +81,7 @@ def lista(item):
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data) data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
data = scrapertools.get_match(data,'class="thumbs-container">(.*?)<div class="clearfix">') data = scrapertools.find_single_match(data,'class="thumbs-container">(.*?)<div class="clearfix">')
patron = '<p class="btime">([^"]+)</p>.*?' patron = '<p class="btime">([^"]+)</p>.*?'
patron += '>(.*?)<img width=.*?' patron += '>(.*?)<img width=.*?'
patron += '="([^"]+)" class="thumb.*?' patron += '="([^"]+)" class="thumb.*?'
+1 -1
View File
@@ -23,7 +23,7 @@ def categorias(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data,'<h3>Categories</h3>(.*?)</ul>') data = scrapertools.find_single_match(data,'<h3>Categories</h3>(.*?)</ul>')
patron = '<li class="cat-item cat-item-\d+"><a href="(.*?)" >(.*?)</a>' patron = '<li class="cat-item cat-item-\d+"><a href="(.*?)" >(.*?)</a>'
matches = re.compile(patron,re.DOTALL).findall(data) matches = re.compile(patron,re.DOTALL).findall(data)
for scrapedurl,scrapedtitle in matches: for scrapedurl,scrapedtitle in matches:
+1 -1
View File
@@ -62,7 +62,7 @@ def categorias(item):
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data) data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
if item.title=="PornStars" : if item.title=="PornStars" :
data = scrapertools.get_match(data,'</i> Hall Of Fame Pornstars</h1>(.*?)</section>') data = scrapertools.find_single_match(data,'</i> Hall Of Fame Pornstars</h1>(.*?)</section>')
patron = '<a class="thumb" href="([^"]+)">.*?<img src="([^"]+)".*?<div class="vidcountSp">(.*?)</div>.*?<a class="categoryTitle".*?>([^"]+)</a>' patron = '<a class="thumb" href="([^"]+)">.*?<img src="([^"]+)".*?<div class="vidcountSp">(.*?)</div>.*?<a class="categoryTitle".*?>([^"]+)</a>'
matches = re.compile(patron,re.DOTALL).findall(data) matches = re.compile(patron,re.DOTALL).findall(data)
for scrapedurl,scrapedthumbnail,cantidad,scrapedtitle in matches: for scrapedurl,scrapedthumbnail,cantidad,scrapedtitle in matches:
+1 -1
View File
@@ -81,7 +81,7 @@ def play(item):
# http://tubehentai.com/media/thumbs/5/2/3/9/c/5239cf74632cbTHLaBlueGirlep3%20%20Segment2000855.000001355.000.mp4 # http://tubehentai.com/media/thumbs/5/2/3/9/c/5239cf74632cbTHLaBlueGirlep3%20%20Segment2000855.000001355.000.mp4
# http://tubehentai.com/media/videos/5/2/3/9/c/5239cf74632cbTHLaBlueGirlep3%20%20Segment2000855.000001355.000.mp4?start=0 # http://tubehentai.com/media/videos/5/2/3/9/c/5239cf74632cbTHLaBlueGirlep3%20%20Segment2000855.000001355.000.mp4?start=0
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
url = scrapertools.get_match(data, 's1.addParam\("flashvars","bufferlength=1&autostart=true&overlay=(.*?\.mp4)') url = scrapertools.find_single_match(data, 's1.addParam\("flashvars","bufferlength=1&autostart=true&overlay=(.*?\.mp4)')
url = url.replace("/thumbs", "/videos") url = url.replace("/thumbs", "/videos")
# url = url+"?start=0" # url = url+"?start=0"
logger.info("url=" + url) logger.info("url=" + url)
+1 -1
View File
@@ -69,7 +69,7 @@ def categorias(item):
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data) data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
data = scrapertools.get_match(data,'<div class="cats-all categories-list">(.*?)</div>') data = scrapertools.find_single_match(data,'<div class="cats-all categories-list">(.*?)</div>')
patron = '<a href="([^"]+)".*?>([^"]+)</a>' patron = '<a href="([^"]+)".*?>([^"]+)</a>'
matches = re.compile(patron,re.DOTALL).findall(data) matches = re.compile(patron,re.DOTALL).findall(data)
for scrapedurl,scrapedtitle in matches: for scrapedurl,scrapedtitle in matches:
+3 -3
View File
@@ -43,11 +43,11 @@ def categorias(item):
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
if item.title == "Canal": if item.title == "Canal":
data = scrapertools.get_match(data,'>Studios</a>(.*?)</ul>') data = scrapertools.find_single_match(data,'>Studios</a>(.*?)</ul>')
if item.title == "Año": if item.title == "Año":
data = scrapertools.get_match(data,'>Years</a>(.*?)</ul>') data = scrapertools.find_single_match(data,'>Years</a>(.*?)</ul>')
if item.title == "Categorias": if item.title == "Categorias":
data = scrapertools.get_match(data,'>XXX Genres</div>(.*?)</ul>') data = scrapertools.find_single_match(data,'>XXX Genres</div>(.*?)</ul>')
patron = '<a href="([^"]+)".*?>([^"]+)</a>(.*?)</li>' patron = '<a href="([^"]+)".*?>([^"]+)</a>(.*?)</li>'
matches = re.compile(patron,re.DOTALL).findall(data) matches = re.compile(patron,re.DOTALL).findall(data)
for scrapedurl,scrapedtitle,cantidad in matches: for scrapedurl,scrapedtitle,cantidad in matches:
+2 -2
View File
@@ -53,7 +53,7 @@ def videos(item):
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
itemlist = [] itemlist = []
data = scrapertools.get_match(data, '<article.+?>(.*?)</article>') data = scrapertools.find_single_match(data, '<article.+?>(.*?)</article>')
# Patron # Patron
patron = '(?s)<div class="thumb-list__item.*?href="([^"]+)".*?src="([^"]+)".*?alt="([^"]+)">.*?' patron = '(?s)<div class="thumb-list__item.*?href="([^"]+)".*?src="([^"]+)".*?alt="([^"]+)">.*?'
@@ -87,7 +87,7 @@ def categorias(item):
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data, '(?s)<div class="all-categories">(.*?)</aside>') data = scrapertools.find_single_match(data, '(?s)<div class="all-categories">(.*?)</aside>')
patron = '(?s)<li>.*?<a href="([^"]+)".*?>([^<]+).*?</a></li>' patron = '(?s)<li>.*?<a href="([^"]+)".*?>([^<]+).*?</a></li>'
matches = re.compile(patron, re.DOTALL).findall(data) matches = re.compile(patron, re.DOTALL).findall(data)
+4 -4
View File
@@ -44,11 +44,11 @@ def categorias(item):
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data) data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
if item.title=="Canal": if item.title=="Canal":
data = scrapertools.get_match(data,'<div class="footer-banner">(.*?)<div id="footer-copyright">') data = scrapertools.find_single_match(data,'<div class="footer-banner">(.*?)<div id="footer-copyright">')
if item.title=="Productora" : if item.title=="Productora" :
data = scrapertools.get_match(data,'<li id="menu-item-16"(.*?)</ul>') data = scrapertools.find_single_match(data,'<li id="menu-item-16"(.*?)</ul>')
if item.title=="Categorias" : if item.title=="Categorias" :
data = scrapertools.get_match(data,'<a>Categories</a>(.*?)</ul>') data = scrapertools.find_single_match(data,'<a>Categories</a>(.*?)</ul>')
patron = '<a href="([^"]+)">([^"]+)</a>' patron = '<a href="([^"]+)">([^"]+)</a>'
matches = re.compile(patron,re.DOTALL).findall(data) matches = re.compile(patron,re.DOTALL).findall(data)
for scrapedurl,scrapedtitle in matches: for scrapedurl,scrapedtitle in matches:
@@ -95,7 +95,7 @@ def play(item):
variable = scrapertools.find_single_match(data,'<script type=\'text/javascript\'> str=\'([^\']+)\'') variable = scrapertools.find_single_match(data,'<script type=\'text/javascript\'> str=\'([^\']+)\'')
resuelta = re.sub("@[A-F0-9][A-F0-9]", lambda m: m.group()[1:].decode('hex'), variable) resuelta = re.sub("@[A-F0-9][A-F0-9]", lambda m: m.group()[1:].decode('hex'), variable)
url = scrapertools.find_single_match(resuelta,'<iframe src="([^"]+)"') url = scrapertools.find_single_match(resuelta,'<iframe src="([^"]+)"')
data = scrapertools.cachePage(url) data = httptools.downloadpage(item.url).data
itemlist = servertools.find_video_items(data=data) itemlist = servertools.find_video_items(data=data)
for videoitem in itemlist: for videoitem in itemlist:
videoitem.title = item.title videoitem.title = item.title
+1 -1
View File
@@ -82,7 +82,7 @@ def findvideos(item):
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data) data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
data = scrapertools.get_match(data,'<div class="video-embed">(.*?)</div>') data = scrapertools.find_single_match(data,'<div class="video-embed">(.*?)</div>')
patron = '<noscript>.*?<iframe src="([^"]+)"' patron = '<noscript>.*?<iframe src="([^"]+)"'
matches = scrapertools.find_multiple_matches(data, patron) matches = scrapertools.find_multiple_matches(data, patron)
for url in matches: for url in matches:
+2 -2
View File
@@ -42,9 +42,9 @@ def categorias(item):
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
if item.title == "Canal" : if item.title == "Canal" :
data = scrapertools.get_match(data,'>Studios</a>(.*?)</ul>') data = scrapertools.find_single_match(data,'>Studios</a>(.*?)</ul>')
else: else:
data = scrapertools.get_match(data,'>Categories</a>(.*?)</ul>') data = scrapertools.find_single_match(data,'>Categories</a>(.*?)</ul>')
patron = '<a href="([^"]+)">([^<]+)</a>' patron = '<a href="([^"]+)">([^<]+)</a>'
matches = re.compile(patron,re.DOTALL).findall(data) matches = re.compile(patron,re.DOTALL).findall(data)
for scrapedurl,scrapedtitle in matches: for scrapedurl,scrapedtitle in matches:
+6 -32
View File
@@ -38,19 +38,20 @@ def categorias(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data1 = scrapertools.get_match(data,'<h5>Popular Categories<br />(.*?)</aside>') data1 = scrapertools.find_single_match(data,'<h5>Popular Categories<br />(.*?)</aside>')
if item.title == "Canal" : if item.title == "Canal" :
data1 = scrapertools.get_match(data,'>Top sites</a>(.*?)</ul>') data1 = scrapertools.find_single_match(data,'>Top sites</a>(.*?)</ul>')
data1 += scrapertools.get_match(data,'Downloads</h2>(.*?)</ul>') data1 += scrapertools.find_single_match(data,'Downloads</h2>(.*?)</ul>')
patron = '<a href="([^<]+)">([^<]+)</a>' patron = '<a href="([^<]+)">([^<]+)</a>'
matches = re.compile(patron,re.DOTALL).findall(data1) matches = re.compile(patron,re.DOTALL).findall(data1)
for scrapedurl,scrapedtitle in matches: for scrapedurl,scrapedtitle in matches:
scrapedplot = "" scrapedplot = ""
scrapedthumbnail = "" scrapedthumbnail = ""
itemlist.append( Item(channel=item.channel, action="lista", title=scrapedtitle, url=scrapedurl, itemlist.append( Item(channel=item.channel, action="lista", title=scrapedtitle, url=scrapedurl,
thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) ) thumbnail=scrapedthumbnail , plot=scrapedplot) )
return itemlist return itemlist
def lista(item): def lista(item):
logger.info() logger.info()
itemlist = [] itemlist = []
@@ -67,7 +68,7 @@ def lista(item):
elif '1080' in scrapedtitle : title= "[COLOR red]" + "1080p" + "[/COLOR] " + scrapedtitle elif '1080' in scrapedtitle : title= "[COLOR red]" + "1080p" + "[/COLOR] " + scrapedtitle
else: title = scrapedtitle else: title = scrapedtitle
itemlist.append( Item(channel=item.channel, action="findvideos", title=title, url=scrapedurl, itemlist.append( Item(channel=item.channel, action="findvideos", title=title, url=scrapedurl,
thumbnail=scrapedthumbnail, fanart=scrapedthumbnail, plot=scrapedplot) ) fanart=scrapedthumbnail, thumbnail=scrapedthumbnail,plot=scrapedplot) )
next_page = scrapertools.find_single_match(data,'<a class="next page-numbers" href="([^"]+)">Next &rarr;</a>') next_page = scrapertools.find_single_match(data,'<a class="next page-numbers" href="([^"]+)">Next &rarr;</a>')
if next_page!="": if next_page!="":
next_page = urlparse.urljoin(item.url,next_page) next_page = urlparse.urljoin(item.url,next_page)
@@ -75,30 +76,3 @@ def lista(item):
return itemlist return itemlist
def findvideos(item):
logger.info()
itemlist = []
data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data,'--more-->(.*?)/a>')
data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
patron = '<a href="([^"]+)".*?class="external">(.*?)<'
matches = re.compile(patron,re.DOTALL).findall(data)
for scrapedurl,scrapedtitle in matches:
scrapedplot = ""
scrapedthumbnail = ""
itemlist.append( Item(channel=item.channel, action="play", title=scrapedtitle, fulltitle=item.title,
url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot) )
return itemlist
def play(item):
logger.info()
data = httptools.downloadpage(item.url).data
itemlist = servertools.find_video_items(data=data)
for videoitem in itemlist:
videoitem.title = item.title
videoitem.fulltitle = item.fulltitle
videoitem.thumbnail = item.thumbnail
videoitem.channel = item.channel
return itemlist
+2 -2
View File
@@ -44,7 +44,7 @@ def categorias(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data, '<h4>Trending(.*?)</ul>') data = scrapertools.find_single_match(data, '<h4>Trending(.*?)</ul>')
data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data) data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
patron = '<li><a href="([^"]+)">([^"]+)</a>' patron = '<li><a href="([^"]+)">([^"]+)</a>'
matches = re.compile(patron, re.DOTALL).findall(data) matches = re.compile(patron, re.DOTALL).findall(data)
@@ -97,7 +97,7 @@ def play(item):
logger.info() logger.info()
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data, 'var encodings(.*?)var') data = scrapertools.find_single_match(data, 'var encodings(.*?)var')
if '360' in data: if '360' in data:
patron = '"360".*?"filename"\:"(.*?)"' patron = '"360".*?"filename"\:"(.*?)"'
if '720' in data: if '720' in data:
+3 -3
View File
@@ -41,7 +41,7 @@ def catalogo(item):
itemlist = [] itemlist = []
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data) data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
data1 = scrapertools.get_match(data,'>Most Popular Pornstars<(.*?)<i class=\'icon-menu-right\'></i></a>') data1 = scrapertools.find_single_match(data,'>Most Popular Pornstars<(.*?)<i class=\'icon-menu-right\'></i></a>')
patron = '<a href="([^"]+)".*?' patron = '<a href="([^"]+)".*?'
patron += 'data-original="([^"]+)".*?' patron += 'data-original="([^"]+)".*?'
patron += '<span class="porn-star-name">([^"]+)</span>.*?' patron += '<span class="porn-star-name">([^"]+)</span>.*?'
@@ -66,9 +66,9 @@ def categorias(item):
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data) data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
if item.title == "Canal": if item.title == "Canal":
data = scrapertools.get_match(data,'>All</div>(.*?)<i class=\'icon-menu-right\'></i></a>') data = scrapertools.find_single_match(data,'>All</div>(.*?)<i class=\'icon-menu-right\'></i></a>')
if item.title == "Categorias": if item.title == "Categorias":
data = scrapertools.get_match(data,'<div class=\'row alphabetical\'.*?>(.*?)>Popular by Country</h2>') data = scrapertools.find_single_match(data,'<div class=\'row alphabetical\'.*?>(.*?)>Popular by Country</h2>')
patron = '<a href="([^"]+)".*?' patron = '<a href="([^"]+)".*?'
patron += '<img src=(.*?)>.*?' patron += '<img src=(.*?)>.*?'
patron += '>([^<]+) (?:Videos|videos)<' patron += '>([^<]+) (?:Videos|videos)<'