Fix regex

This commit is contained in:
t1254362
2018-04-27 21:45:38 +02:00
parent bc34fec50f
commit 7de3711614
+100 -100
View File
@@ -1,100 +1,100 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import re import re
from core import httptools from core import httptools
from core.item import Item from core.item import Item
from platformcode import logger from platformcode import logger
from urlparse import urljoin from urlparse import urljoin
from core import servertools from core import servertools
HOST="http://yespornplease.com" HOST="http://yespornplease.com"
def mainlist(item): def mainlist(item):
logger.info() logger.info()
itemlist = [] itemlist = []
itemlist.append(item.clone(action="links", title="Novedades", url=HOST)) itemlist.append(item.clone(action="links", title="Novedades", url=HOST))
itemlist.append(item.clone(action="categories", title="Categorías", url=urljoin(HOST, "categories"))) itemlist.append(item.clone(action="categories", title="Categorías", url=urljoin(HOST, "categories")))
itemlist.append(item.clone(action="search", title="Buscar", url=urljoin(HOST, "search"))) itemlist.append(item.clone(action="search", title="Buscar", url=urljoin(HOST, "search")))
return itemlist return itemlist
def search(item, texto): def search(item, texto):
logger.info("texto = %s" %(texto)) logger.info("texto = %s" %(texto))
item.url = urljoin(HOST, "search&q=" + texto) item.url = urljoin(HOST, "search&q=" + texto)
try: try:
return links(item) return links(item)
# Se captura la excepción, para no interrumpir al buscador global si un canal falla # Se captura la excepción, para no interrumpir al buscador global si un canal falla
except: except:
import sys import sys
for line in sys.exc_info(): for line in sys.exc_info():
logger.error("%s" % line) logger.error("%s" % line)
return [] return []
def categories(item): def categories(item):
logger.info() logger.info()
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
result = [] result = []
categories = re.findall("href=[\"'](?P<url>/search[^\"']+).*?>(?P<name>[^<>]+)</div>.*?badge[^>]+>(?P<counter>\d+)", data, re.DOTALL | re.MULTILINE) categories = re.findall("href=[\"'](?P<url>/search[^\"']+).*?>(?P<name>[^<>]+)</div>.*?badge[^>]+>(?P<counter>\d+)", data, re.DOTALL | re.MULTILINE)
for url, name, counter in categories: for url, name, counter in categories:
result.append(item.clone(action = "links", title = "%s (%s videos)" % (name, counter), url = urljoin(item.url, url))) result.append(item.clone(action = "links", title = "%s (%s videos)" % (name, counter), url = urljoin(item.url, url)))
return result return result
def get_page(url): def get_page(url):
page = re.search("p=(\d+)", url) page = re.search("p=(\d+)", url)
if page: if page:
return int(page.group(1)) return int(page.group(1))
return 1 return 1
def get_page_url(url, page): def get_page_url(url, page):
logger.debug("URL: %s to page %d" % (url, page)) logger.debug("URL: %s to page %d" % (url, page))
resultURL = re.sub("([&\?]p=)(?:\d+)", "\g<1>%d" % page, url) resultURL = re.sub("([&\?]p=)(?:\d+)", "\g<1>%d" % page, url)
if resultURL == url: if resultURL == url:
resultURL += ("&" if "?" in url else "?") + "p=%d" % (page) resultURL += ("&" if "?" in url else "?") + "p=%d" % (page)
logger.debug("Result: %s" % (resultURL)) logger.debug("Result: %s" % (resultURL))
return resultURL return resultURL
def links(item): def links(item):
logger.info() logger.info()
data = httptools.downloadpage(item.url).data data = httptools.downloadpage(item.url).data
reExpr = "<img\s+src=['\"](?P<img>[^'\"]+)[^>]+title[^'\"]*['\"](?P<title>[^\"]+)[^>]+id[^'\"]*['\"](?P<id>[^'\"]+)[^>]*>(?:[^<]*<[^>]+>(?P<quality>[^<]+)<)?[^<]*<[^>]*duration[^>]*>(?P<duration>[^<]+)" reExpr = "<img\s+src=['\"](?P<img>[^'\"]+)[^>]+(?:title|alt)[^'\"]*['\"](?P<title>[^\"]+)[^>]+id[^'\"]*['\"](?P<id>[^'\"]+)[^>]*>(?:[^<]*<[^>]+>(?P<quality>[^<]+)<)?[^<]*<[^>]*duration[^>]*>(?P<duration>[^<]+)"
reResults = re.findall(reExpr, data, re.MULTILINE | re.DOTALL) reResults = re.findall(reExpr, data, re.MULTILINE | re.DOTALL)
result = [] result = []
for img, title, vID, quality, duration in reResults: for img, title, vID, quality, duration in reResults:
logger.info("[link] %(title)s [%(quality)s] [%(duration)s]: %(vid)s (%(img)s" % ({"title": title, "duration": duration, "vid": vID, "img": img, "quality": quality if quality else "--"})) logger.info("[link] %(title)s [%(quality)s] [%(duration)s]: %(vid)s (%(img)s" % ({"title": title, "duration": duration, "vid": vID, "img": img, "quality": quality if quality else "--"}))
formattedQuality = "" formattedQuality = ""
if quality: if quality:
formattedQuality += " [%s]" % (quality) formattedQuality += " [%s]" % (quality)
titleFormatted = "%(title)s%(quality)s [%(duration)s]" % ({"title": title, "quality": formattedQuality, "duration": duration}) titleFormatted = "%(title)s%(quality)s [%(duration)s]" % ({"title": title, "quality": formattedQuality, "duration": duration})
result.append(item.clone(action = "play", title = titleFormatted, url = urljoin(item.url, "/view/%s" % (vID)), thumbnail = urljoin(item.url, img), vID = vID)) result.append(item.clone(action = "play", title = titleFormatted, url = urljoin(item.url, "/view/%s" % (vID)), thumbnail = urljoin(item.url, img), vID = vID))
# Has pagination # Has pagination
paginationOccurences = data.count('class="prevnext"') paginationOccurences = data.count('class="prevnext"')
if paginationOccurences: if paginationOccurences:
page = get_page(item.url) page = get_page(item.url)
logger.info("Page " + str(page) + " Ocurrences: " + str(paginationOccurences)) logger.info("Page " + str(page) + " Ocurrences: " + str(paginationOccurences))
if page > 1: if page > 1:
result.append(item.clone(action = "links", title = "<< Anterior", url = get_page_url(item.url, page - 1))) result.append(item.clone(action = "links", title = "<< Anterior", url = get_page_url(item.url, page - 1)))
if paginationOccurences > 1 or page == 1: if paginationOccurences > 1 or page == 1:
result.append(item.clone(action = "links", title = "Siguiente >>", url = get_page_url(item.url, page + 1))) result.append(item.clone(action = "links", title = "Siguiente >>", url = get_page_url(item.url, page + 1)))
return result return result
def play(item): def play(item):
logger.info(item) logger.info(item)
embededURL = urljoin(item.url, "/e/%s/width-650/height-400/autoplay-0/" % (item.vID)) embededURL = urljoin(item.url, "/view/%s" % (item.vID))
itemlist = servertools.find_video_items(item.clone(url = embededURL)) itemlist = servertools.find_video_items(item.clone(url = embededURL))
return itemlist return itemlist