Corrección scrapertools.cachepage y scrapertools.get_match

This commit is contained in:
paezner
2019-04-03 17:19:52 +02:00
parent 867a14fe23
commit 550e67da94
33 changed files with 76 additions and 112 deletions
+4 -4
View File
@@ -44,11 +44,11 @@ def categorias(item):
data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
if item.title=="Canal":
data = scrapertools.get_match(data,'<div class="footer-banner">(.*?)<div id="footer-copyright">')
data = scrapertools.find_single_match(data,'<div class="footer-banner">(.*?)<div id="footer-copyright">')
if item.title=="Productora" :
data = scrapertools.get_match(data,'<li id="menu-item-16"(.*?)</ul>')
data = scrapertools.find_single_match(data,'<li id="menu-item-16"(.*?)</ul>')
if item.title=="Categorias" :
data = scrapertools.get_match(data,'<a>Categories</a>(.*?)</ul>')
data = scrapertools.find_single_match(data,'<a>Categories</a>(.*?)</ul>')
patron = '<a href="([^"]+)">([^"]+)</a>'
matches = re.compile(patron,re.DOTALL).findall(data)
for scrapedurl,scrapedtitle in matches:
@@ -95,7 +95,7 @@ def play(item):
variable = scrapertools.find_single_match(data,'<script type=\'text/javascript\'> str=\'([^\']+)\'')
resuelta = re.sub("@[A-F0-9][A-F0-9]", lambda m: m.group()[1:].decode('hex'), variable)
url = scrapertools.find_single_match(resuelta,'<iframe src="([^"]+)"')
data = scrapertools.cachePage(url)
data = httptools.downloadpage(item.url).data
itemlist = servertools.find_video_items(data=data)
for videoitem in itemlist:
videoitem.title = item.title