pulizia
This commit is contained in:
@@ -1,112 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# --------------------------------------------------------------------------------
|
||||
# Cloudflare decoder
|
||||
# --------------------------------------------------------------------------------
|
||||
|
||||
import re
|
||||
import time
|
||||
import urllib
|
||||
|
||||
import urlparse
|
||||
|
||||
from platformcode import logger
|
||||
|
||||
|
||||
class Cloudflare:
|
||||
def __init__(self, response):
|
||||
self.timeout = 5
|
||||
self.domain = urlparse.urlparse(response["url"])[1]
|
||||
self.protocol = urlparse.urlparse(response["url"])[0]
|
||||
self.js_data = {}
|
||||
self.header_data = {}
|
||||
if not "var s,t,o,p,b,r,e,a,k,i,n,g,f" in response["data"] or "chk_jschl" in response["url"]:
|
||||
return
|
||||
try:
|
||||
self.js_data["data"] = response["data"]
|
||||
self.js_data["auth_url"] = \
|
||||
re.compile('<form id="challenge-form" action="([^"]+)" method="get">').findall(response["data"])[0]
|
||||
self.js_data["params"] = {}
|
||||
self.js_data["params"]["jschl_vc"] = \
|
||||
re.compile('<input type="hidden" name="jschl_vc" value="([^"]+)"/>').findall(response["data"])[0]
|
||||
self.js_data["params"]["pass"] = \
|
||||
re.compile('<input type="hidden" name="pass" value="([^"]+)"/>').findall(response["data"])[0]
|
||||
self.js_data["wait"] = int(re.compile("\}, ([\d]+)\);", re.MULTILINE).findall(response["data"])[0]) / 1000
|
||||
self.js_data["params"]["s"] = \
|
||||
re.compile('<input type="hidden" name="s" value="([^"]+)"').findall(response["data"])[0]
|
||||
except:
|
||||
logger.debug("Metodo #1 (javascript): NO disponible")
|
||||
self.js_data = {}
|
||||
if "refresh" in response["headers"]:
|
||||
try:
|
||||
self.header_data["wait"] = int(response["headers"]["refresh"].split(";")[0])
|
||||
self.header_data["auth_url"] = response["headers"]["refresh"].split("=")[1].split("?")[0]
|
||||
self.header_data["params"] = {}
|
||||
self.header_data["params"]["pass"] = response["headers"]["refresh"].split("=")[2]
|
||||
except:
|
||||
logger.debug("Metodo #2 (headers): NO disponible")
|
||||
self.header_data = {}
|
||||
|
||||
def solve_cf(self, body, domain):
|
||||
js = re.search(
|
||||
r"setTimeout\(function\(\){\s+(var s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n",
|
||||
body
|
||||
).group(1)
|
||||
|
||||
js = re.sub(r"a\.value = ((.+).toFixed\(10\))?", r"\1", js)
|
||||
js = re.sub(r'(e\s=\sfunction\(s\)\s{.*?};)', '', js, flags=re.DOTALL|re.MULTILINE)
|
||||
js = re.sub(r"\s{3,}[a-z](?: = |\.).+", "", js).replace("t.length", str(len(domain)))
|
||||
js = js.replace('; 121', '')
|
||||
js = re.sub(r"[\n\\']", "", js)
|
||||
jsEnv = """
|
||||
var t = "{domain}";
|
||||
var g = String.fromCharCode;
|
||||
o = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
|
||||
e = function(s) {{
|
||||
s += "==".slice(2 - (s.length & 3));
|
||||
var bm, r = "", r1, r2, i = 0;
|
||||
for (; i < s.length;) {{
|
||||
bm = o.indexOf(s.charAt(i++)) << 18 | o.indexOf(s.charAt(i++)) << 12 | (r1 = o.indexOf(s.charAt(i++))) << 6 | (r2 = o.indexOf(s.charAt(i++)));
|
||||
r += r1 === 64 ? g(bm >> 16 & 255) : r2 === 64 ? g(bm >> 16 & 255, bm >> 8 & 255) : g(bm >> 16 & 255, bm >> 8 & 255, bm & 255);
|
||||
}}
|
||||
return r;
|
||||
}};
|
||||
function italics (str) {{ return '<i>' + this + '</i>'; }};
|
||||
var document = {{
|
||||
getElementById: function () {{
|
||||
return {{'innerHTML': '{innerHTML}'}};
|
||||
}}
|
||||
}};
|
||||
{js}
|
||||
"""
|
||||
innerHTML = re.search('<div(?: [^<>]*)? id="([^<>]*?)">([^<>]*?)<\/div>', body , re.MULTILINE | re.DOTALL)
|
||||
innerHTML = innerHTML.group(2).replace("'", r"\'") if innerHTML else ""
|
||||
import js2py
|
||||
from jsc import jsunc
|
||||
js = jsunc(jsEnv.format(domain=domain, innerHTML=innerHTML, js=js))
|
||||
def atob(s):
|
||||
return base64.b64decode('{}'.format(s)).decode('utf-8')
|
||||
js2py.disable_pyimport()
|
||||
context = js2py.EvalJs({'atob': atob})
|
||||
result = context.eval(js)
|
||||
return float(result)
|
||||
|
||||
|
||||
@property
|
||||
def wait_time(self):
|
||||
if self.js_data.get("wait", 0):
|
||||
return self.js_data["wait"]
|
||||
else:
|
||||
return self.header_data.get("wait", 0)
|
||||
|
||||
@property
|
||||
def is_cloudflare(self):
|
||||
return self.header_data.get("wait", 0) > 0 or self.js_data.get("wait", 0) > 0
|
||||
|
||||
def get_url(self):
|
||||
# Metodo #1 (javascript)
|
||||
if self.js_data.get("wait", 0):
|
||||
self.js_data["params"]["jschl_answer"] = self.solve_cf(self.js_data["data"], self.domain)
|
||||
response = "%s://%s%s?%s" % (
|
||||
self.protocol, self.domain, self.js_data["auth_url"], urllib.urlencode(self.js_data["params"]))
|
||||
time.sleep(self.js_data["wait"])
|
||||
return response
|
||||
+7
-7
@@ -17,7 +17,7 @@ from threading import Lock
|
||||
from core.jsontools import to_utf8
|
||||
from platformcode import config, logger
|
||||
from platformcode.logger import WebErrorException
|
||||
from core import scrapertoolsV2
|
||||
from core import scrapertools
|
||||
|
||||
# Get the addon version
|
||||
__version = config.get_addon_version()
|
||||
@@ -48,7 +48,7 @@ def get_user_agent():
|
||||
|
||||
def get_url_headers(url, forced=False):
|
||||
domain = urlparse.urlparse(url)[1]
|
||||
sub_dom = scrapertoolsV2.find_single_match(domain, r'\.(.*?\.\w+)')
|
||||
sub_dom = scrapertools.find_single_match(domain, r'\.(.*?\.\w+)')
|
||||
if sub_dom and not 'google' in url:
|
||||
domain = sub_dom
|
||||
domain_cookies = cj._cookies.get("." + domain, {}).get("/", {})
|
||||
@@ -159,16 +159,16 @@ def channel_proxy_list(url, forced_proxy=None):
|
||||
|
||||
if not url.endswith('/'):
|
||||
url += '/'
|
||||
if scrapertoolsV2.find_single_match(url, r'(?:http.*\:)?\/\/(?:www\.)?([^\?|\/]+)(?:\?|\/)') \
|
||||
if scrapertools.find_single_match(url, r'(?:http.*\:)?\/\/(?:www\.)?([^\?|\/]+)(?:\?|\/)') \
|
||||
in proxy_channel_bloqued:
|
||||
if forced_proxy and forced_proxy not in ['Total', 'ProxyDirect', 'ProxyCF', 'ProxyWeb']:
|
||||
if forced_proxy in proxy_channel_bloqued[scrapertoolsV2.find_single_match(url, r'(?:http.*\:)?\/\/(?:www\.)?([^\?|\/]+)(?:\?|\/)')]:
|
||||
if forced_proxy in proxy_channel_bloqued[scrapertools.find_single_match(url, r'(?:http.*\:)?\/\/(?:www\.)?([^\?|\/]+)(?:\?|\/)')]:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
if forced_proxy:
|
||||
return True
|
||||
if not 'OFF' in proxy_channel_bloqued[scrapertoolsV2.find_single_match(url, r'(?:http.*\:)?\/\/(?:www\.)?([^\?|\/]+)(?:\?|\/)')]:
|
||||
if not 'OFF' in proxy_channel_bloqued[scrapertools.find_single_match(url, r'(?:http.*\:)?\/\/(?:www\.)?([^\?|\/]+)(?:\?|\/)')]:
|
||||
return True
|
||||
|
||||
return False
|
||||
@@ -248,7 +248,7 @@ def check_proxy(url, **opt):
|
||||
proxy_data['log'] = proxytools.get_proxy_addr(url, post=opt.get('post', None), forced_proxy=forced_proxy)
|
||||
|
||||
if proxy_addr_forced and proxy_data['log']:
|
||||
proxy_data['log'] = scrapertoolsV2.find_single_match(str(proxy_addr_forced), r"{'http.*':\s*'(.*?)'}")
|
||||
proxy_data['log'] = scrapertools.find_single_match(str(proxy_addr_forced), r"{'http.*':\s*'(.*?)'}")
|
||||
|
||||
if proxy and proxy_data['addr']:
|
||||
if proxy_addr_forced: proxy_data['addr'] = proxy_addr_forced
|
||||
@@ -564,7 +564,7 @@ def downloadpage(url, **opt):
|
||||
save_cookies(alfa_s=opt.get('alfa_s', False))
|
||||
|
||||
# is_channel = inspect.getmodule(inspect.currentframe().f_back)
|
||||
# is_channel = scrapertoolsV2.find_single_match(str(is_channel), "<module '(channels).*?'")
|
||||
# is_channel = scrapertools.find_single_match(str(is_channel), "<module '(channels).*?'")
|
||||
# if is_channel and isinstance(response_code, int):
|
||||
# if not opt.get('ignore_response_code', False) and not proxy_data.get('stat', ''):
|
||||
# if response_code > 399:
|
||||
|
||||
+76
-82
@@ -1,27 +1,17 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# --------------------------------------------------------------------------------
|
||||
# Scraper tools for reading and processing web elements
|
||||
# Scraper tools v2 for reading and processing web elements
|
||||
# --------------------------------------------------------------------------------
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
from core import httptools
|
||||
import urlparse
|
||||
|
||||
from core.entities import html5
|
||||
from platformcode import logger
|
||||
|
||||
|
||||
def get_header_from_response(url, header_to_get="", post=None, headers=None):
|
||||
header_to_get = header_to_get.lower()
|
||||
response = httptools.downloadpage(url, post=post, headers=headers, only_headers=True)
|
||||
return response.headers.get(header_to_get)
|
||||
|
||||
|
||||
def read_body_and_headers(url, post=None, headers=None, follow_redirects=False, timeout=None):
|
||||
response = httptools.downloadpage(url, post=post, headers=headers, follow_redirects=follow_redirects,
|
||||
timeout=timeout)
|
||||
return response.data, response.headers
|
||||
|
||||
|
||||
def printMatches(matches):
|
||||
i = 0
|
||||
for match in matches:
|
||||
@@ -42,8 +32,37 @@ def find_multiple_matches(text, pattern):
|
||||
return re.findall(pattern, text, re.DOTALL)
|
||||
|
||||
|
||||
def entityunescape(cadena):
|
||||
return unescape(cadena)
|
||||
def find_multiple_matches_groups(text, pattern):
|
||||
r = re.compile(pattern)
|
||||
return [m.groupdict() for m in r.finditer(text)]
|
||||
|
||||
|
||||
# Convierte los codigos html "ñ" y lo reemplaza por "ñ" caracter unicode utf-8
|
||||
def decodeHtmlentities(data):
|
||||
entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8})(;?)")
|
||||
|
||||
def substitute_entity(match):
|
||||
ent = match.group(2) + match.group(3)
|
||||
res = ""
|
||||
while not ent in html5 and not ent.endswith(";") and match.group(1) != "#":
|
||||
# Excepción para cuando '&' se usa como argumento en la urls contenidas en los datos
|
||||
try:
|
||||
res = ent[-1] + res
|
||||
ent = ent[:-1]
|
||||
except:
|
||||
break
|
||||
|
||||
if match.group(1) == "#":
|
||||
ent = unichr(int(ent.replace(";", "")))
|
||||
return ent.encode('utf-8')
|
||||
else:
|
||||
cp = html5.get(ent)
|
||||
if cp:
|
||||
return cp.decode("unicode-escape").encode('utf-8') + res
|
||||
else:
|
||||
return match.group()
|
||||
|
||||
return entity_re.subn(substitute_entity, data)[0]
|
||||
|
||||
|
||||
def unescape(text):
|
||||
@@ -84,47 +103,6 @@ def unescape(text):
|
||||
# Convierte los codigos html "ñ" y lo reemplaza por "ñ" caracter unicode utf-8
|
||||
|
||||
|
||||
def decodeHtmlentities(string):
|
||||
string = entitiesfix(string)
|
||||
entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
|
||||
|
||||
def substitute_entity(match):
|
||||
from htmlentitydefs import name2codepoint as n2cp
|
||||
ent = match.group(2)
|
||||
if match.group(1) == "#":
|
||||
return unichr(int(ent)).encode('utf-8')
|
||||
else:
|
||||
cp = n2cp.get(ent)
|
||||
|
||||
if cp:
|
||||
return unichr(cp).encode('utf-8')
|
||||
else:
|
||||
return match.group()
|
||||
|
||||
return entity_re.subn(substitute_entity, string)[0]
|
||||
|
||||
|
||||
def entitiesfix(string):
|
||||
# Las entidades comienzan siempre con el símbolo & , y terminan con un punto y coma ( ; ).
|
||||
string = string.replace("á", "á")
|
||||
string = string.replace("é", "é")
|
||||
string = string.replace("í", "í")
|
||||
string = string.replace("ó", "ó")
|
||||
string = string.replace("ú", "ú")
|
||||
string = string.replace("Á", "Á")
|
||||
string = string.replace("É", "É")
|
||||
string = string.replace("Í", "Í")
|
||||
string = string.replace("Ó", "Ó")
|
||||
string = string.replace("Ú", "Ú")
|
||||
string = string.replace("ü", "ü")
|
||||
string = string.replace("Ü", "Ü")
|
||||
string = string.replace("ñ", "ñ")
|
||||
string = string.replace("¿", "¿")
|
||||
string = string.replace("¡", "¡")
|
||||
string = string.replace(";;", ";")
|
||||
return string
|
||||
|
||||
|
||||
def htmlclean(cadena):
|
||||
cadena = re.compile("<!--.*?-->", re.DOTALL).sub("", cadena)
|
||||
|
||||
@@ -226,7 +204,7 @@ def htmlclean(cadena):
|
||||
cadena = re.compile("<link[^>]*>", re.DOTALL).sub("", cadena)
|
||||
|
||||
cadena = cadena.replace("\t", "")
|
||||
cadena = entityunescape(cadena)
|
||||
# cadena = entityunescape(cadena)
|
||||
return cadena
|
||||
|
||||
|
||||
@@ -314,8 +292,8 @@ def remove_show_from_title(title, show):
|
||||
return title
|
||||
|
||||
|
||||
# scrapertools.get_filename_from_url(media_url)[-4:]
|
||||
def get_filename_from_url(url):
|
||||
import urlparse
|
||||
parsed_url = urlparse.urlparse(url)
|
||||
try:
|
||||
filename = parsed_url.path
|
||||
@@ -332,19 +310,18 @@ def get_filename_from_url(url):
|
||||
return filename
|
||||
|
||||
|
||||
# def get_domain_from_url(url):
|
||||
# import urlparse
|
||||
# parsed_url = urlparse.urlparse(url)
|
||||
# try:
|
||||
# filename = parsed_url.netloc
|
||||
# except:
|
||||
# # Si falla es porque la implementación de parsed_url no reconoce los atributos como "path"
|
||||
# if len(parsed_url) >= 4:
|
||||
# filename = parsed_url[1]
|
||||
# else:
|
||||
# filename = ""
|
||||
#
|
||||
# return filename
|
||||
def get_domain_from_url(url):
|
||||
parsed_url = urlparse.urlparse(url)
|
||||
try:
|
||||
filename = parsed_url.netloc
|
||||
except:
|
||||
# Si falla es porque la implementación de parsed_url no reconoce los atributos como "path"
|
||||
if len(parsed_url) >= 4:
|
||||
filename = parsed_url[1]
|
||||
else:
|
||||
filename = ""
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
def get_season_and_episode(title):
|
||||
@@ -365,22 +342,15 @@ def get_season_and_episode(title):
|
||||
@return: Numero de temporada y episodio en formato "1x01" o cadena vacia si no se han encontrado
|
||||
"""
|
||||
filename = ""
|
||||
# 4l3x87 - fix for series example 9-1-1
|
||||
# original_title = title
|
||||
# title = title.replace('9-1-1','')
|
||||
|
||||
patrons = ["(\d+)\s*[x-]\s*(\d+)", "(\d+)\s*×\s*(\d+)", "(?:s|t)(\d+)e(\d+)",
|
||||
"(?:season|temp|stagione\w*)\s*(\d+)\s*(?:capitulo|epi|episode|episodio\w*)\s*(\d+)"]
|
||||
patrons = ["(\d+)x(\d+)", "(?:s|t)(\d+)e(\d+)",
|
||||
"(?:season|temp\w*)\s*(\d+)\s*(?:capitulo|epi\w*)\s*(\d+)"]
|
||||
|
||||
for patron in patrons:
|
||||
try:
|
||||
matches = re.compile(patron, re.I).search(title)
|
||||
|
||||
if matches:
|
||||
if len(matches.group(1)) == 1:
|
||||
filename = matches.group(1) + "x" + matches.group(2).zfill(2)
|
||||
else:
|
||||
filename = matches.group(1).lstrip('0') + "x" + matches.group(2).zfill(2)
|
||||
filename = matches.group(1) + "x" + matches.group(2).zfill(2)
|
||||
break
|
||||
except:
|
||||
pass
|
||||
@@ -388,3 +358,27 @@ def get_season_and_episode(title):
|
||||
logger.info("'" + title + "' -> '" + filename + "'")
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
def get_sha1(cadena):
|
||||
try:
|
||||
import hashlib
|
||||
devuelve = hashlib.sha1(cadena).hexdigest()
|
||||
except:
|
||||
import sha
|
||||
import binascii
|
||||
devuelve = binascii.hexlify(sha.new(cadena).digest())
|
||||
|
||||
return devuelve
|
||||
|
||||
|
||||
def get_md5(cadena):
|
||||
try:
|
||||
import hashlib
|
||||
devuelve = hashlib.md5(cadena).hexdigest()
|
||||
except:
|
||||
import md5
|
||||
import binascii
|
||||
devuelve = binascii.hexlify(md5.new(cadena).digest())
|
||||
|
||||
return devuelve
|
||||
|
||||
@@ -1,346 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# --------------------------------------------------------------------------------
|
||||
# Scraper tools v2 for reading and processing web elements
|
||||
# --------------------------------------------------------------------------------
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
import urlparse
|
||||
|
||||
from core.entities import html5
|
||||
from platformcode import logger
|
||||
|
||||
|
||||
def printMatches(matches):
|
||||
i = 0
|
||||
for match in matches:
|
||||
logger.info("%d %s" % (i, match))
|
||||
i = i + 1
|
||||
|
||||
|
||||
def find_single_match(data, patron, index=0):
|
||||
try:
|
||||
matches = re.findall(patron, data, flags=re.DOTALL)
|
||||
return matches[index]
|
||||
except:
|
||||
return ""
|
||||
|
||||
|
||||
# Parse string and extracts multiple matches using regular expressions
|
||||
def find_multiple_matches(text, pattern):
|
||||
return re.findall(pattern, text, re.DOTALL)
|
||||
|
||||
|
||||
def find_multiple_matches_groups(text, pattern):
|
||||
r = re.compile(pattern)
|
||||
return [m.groupdict() for m in r.finditer(text)]
|
||||
|
||||
|
||||
# Convierte los codigos html "ñ" y lo reemplaza por "ñ" caracter unicode utf-8
|
||||
def decodeHtmlentities(data):
|
||||
entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8})(;?)")
|
||||
|
||||
def substitute_entity(match):
|
||||
ent = match.group(2) + match.group(3)
|
||||
res = ""
|
||||
while not ent in html5 and not ent.endswith(";") and match.group(1) != "#":
|
||||
# Excepción para cuando '&' se usa como argumento en la urls contenidas en los datos
|
||||
try:
|
||||
res = ent[-1] + res
|
||||
ent = ent[:-1]
|
||||
except:
|
||||
break
|
||||
|
||||
if match.group(1) == "#":
|
||||
ent = unichr(int(ent.replace(";", "")))
|
||||
return ent.encode('utf-8')
|
||||
else:
|
||||
cp = html5.get(ent)
|
||||
if cp:
|
||||
return cp.decode("unicode-escape").encode('utf-8') + res
|
||||
else:
|
||||
return match.group()
|
||||
|
||||
return entity_re.subn(substitute_entity, data)[0]
|
||||
|
||||
|
||||
def htmlclean(cadena):
|
||||
cadena = re.compile("<!--.*?-->", re.DOTALL).sub("", cadena)
|
||||
|
||||
cadena = cadena.replace("<center>", "")
|
||||
cadena = cadena.replace("</center>", "")
|
||||
cadena = cadena.replace("<cite>", "")
|
||||
cadena = cadena.replace("</cite>", "")
|
||||
cadena = cadena.replace("<em>", "")
|
||||
cadena = cadena.replace("</em>", "")
|
||||
cadena = cadena.replace("<u>", "")
|
||||
cadena = cadena.replace("</u>", "")
|
||||
cadena = cadena.replace("<li>", "")
|
||||
cadena = cadena.replace("</li>", "")
|
||||
cadena = cadena.replace("<turl>", "")
|
||||
cadena = cadena.replace("</tbody>", "")
|
||||
cadena = cadena.replace("<tr>", "")
|
||||
cadena = cadena.replace("</tr>", "")
|
||||
cadena = cadena.replace("<![CDATA[", "")
|
||||
cadena = cadena.replace("<wbr>", "")
|
||||
cadena = cadena.replace("<Br />", " ")
|
||||
cadena = cadena.replace("<BR />", " ")
|
||||
cadena = cadena.replace("<Br>", " ")
|
||||
cadena = re.compile("<br[^>]*>", re.DOTALL).sub(" ", cadena)
|
||||
|
||||
cadena = re.compile("<script.*?</script>", re.DOTALL).sub("", cadena)
|
||||
|
||||
cadena = re.compile("<option[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</option>", "")
|
||||
|
||||
cadena = re.compile("<button[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</button>", "")
|
||||
|
||||
cadena = re.compile("<i[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</iframe>", "")
|
||||
cadena = cadena.replace("</i>", "")
|
||||
|
||||
cadena = re.compile("<table[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</table>", "")
|
||||
|
||||
cadena = re.compile("<td[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</td>", "")
|
||||
|
||||
cadena = re.compile("<div[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</div>", "")
|
||||
|
||||
cadena = re.compile("<dd[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</dd>", "")
|
||||
|
||||
cadena = re.compile("<b[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</b>", "")
|
||||
|
||||
cadena = re.compile("<font[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</font>", "")
|
||||
|
||||
cadena = re.compile("<strong[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</strong>", "")
|
||||
|
||||
cadena = re.compile("<small[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</small>", "")
|
||||
|
||||
cadena = re.compile("<span[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</span>", "")
|
||||
|
||||
cadena = re.compile("<a[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</a>", "")
|
||||
|
||||
cadena = re.compile("<p[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</p>", "")
|
||||
|
||||
cadena = re.compile("<ul[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</ul>", "")
|
||||
|
||||
cadena = re.compile("<h1[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</h1>", "")
|
||||
|
||||
cadena = re.compile("<h2[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</h2>", "")
|
||||
|
||||
cadena = re.compile("<h3[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</h3>", "")
|
||||
|
||||
cadena = re.compile("<h4[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</h4>", "")
|
||||
|
||||
cadena = re.compile("<!--[^-]+-->", re.DOTALL).sub("", cadena)
|
||||
|
||||
cadena = re.compile("<img[^>]*>", re.DOTALL).sub("", cadena)
|
||||
|
||||
cadena = re.compile("<object[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</object>", "")
|
||||
cadena = re.compile("<param[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</param>", "")
|
||||
cadena = re.compile("<embed[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</embed>", "")
|
||||
|
||||
cadena = re.compile("<title[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</title>", "")
|
||||
|
||||
cadena = re.compile("<link[^>]*>", re.DOTALL).sub("", cadena)
|
||||
|
||||
cadena = cadena.replace("\t", "")
|
||||
# cadena = entityunescape(cadena)
|
||||
return cadena
|
||||
|
||||
|
||||
def slugify(title):
|
||||
# print title
|
||||
|
||||
# Sustituye acentos y eñes
|
||||
title = title.replace("Á", "a")
|
||||
title = title.replace("É", "e")
|
||||
title = title.replace("Í", "i")
|
||||
title = title.replace("Ó", "o")
|
||||
title = title.replace("Ú", "u")
|
||||
title = title.replace("á", "a")
|
||||
title = title.replace("é", "e")
|
||||
title = title.replace("í", "i")
|
||||
title = title.replace("ó", "o")
|
||||
title = title.replace("ú", "u")
|
||||
title = title.replace("À", "a")
|
||||
title = title.replace("È", "e")
|
||||
title = title.replace("Ì", "i")
|
||||
title = title.replace("Ò", "o")
|
||||
title = title.replace("Ù", "u")
|
||||
title = title.replace("à", "a")
|
||||
title = title.replace("è", "e")
|
||||
title = title.replace("ì", "i")
|
||||
title = title.replace("ò", "o")
|
||||
title = title.replace("ù", "u")
|
||||
title = title.replace("ç", "c")
|
||||
title = title.replace("Ç", "C")
|
||||
title = title.replace("Ñ", "n")
|
||||
title = title.replace("ñ", "n")
|
||||
title = title.replace("/", "-")
|
||||
title = title.replace("&", "&")
|
||||
|
||||
# Pasa a minúsculas
|
||||
title = title.lower().strip()
|
||||
|
||||
# Elimina caracteres no válidos
|
||||
validchars = "abcdefghijklmnopqrstuvwxyz1234567890- "
|
||||
title = ''.join(c for c in title if c in validchars)
|
||||
|
||||
# Sustituye espacios en blanco duplicados y saltos de línea
|
||||
title = re.compile("\s+", re.DOTALL).sub(" ", title)
|
||||
|
||||
# Sustituye espacios en blanco por guiones
|
||||
title = re.compile("\s", re.DOTALL).sub("-", title.strip())
|
||||
|
||||
# Sustituye espacios en blanco duplicados y saltos de línea
|
||||
title = re.compile("\-+", re.DOTALL).sub("-", title)
|
||||
|
||||
# Arregla casos especiales
|
||||
if title.startswith("-"):
|
||||
title = title[1:]
|
||||
|
||||
if title == "":
|
||||
title = "-" + str(time.time())
|
||||
|
||||
return title
|
||||
|
||||
|
||||
def remove_htmltags(string):
|
||||
return re.sub('<[^<]+?>', '', string)
|
||||
|
||||
|
||||
def remove_show_from_title(title, show):
|
||||
# print slugify(title)+" == "+slugify(show)
|
||||
# Quita el nombre del programa del título
|
||||
if slugify(title).startswith(slugify(show)):
|
||||
|
||||
# Convierte a unicode primero, o el encoding se pierde
|
||||
title = unicode(title, "utf-8", "replace")
|
||||
show = unicode(show, "utf-8", "replace")
|
||||
title = title[len(show):].strip()
|
||||
|
||||
if title.startswith("-"):
|
||||
title = title[1:].strip()
|
||||
|
||||
if title == "":
|
||||
title = str(time.time())
|
||||
|
||||
# Vuelve a utf-8
|
||||
title = title.encode("utf-8", "ignore")
|
||||
show = show.encode("utf-8", "ignore")
|
||||
|
||||
return title
|
||||
|
||||
|
||||
# scrapertools.get_filename_from_url(media_url)[-4:]
|
||||
def get_filename_from_url(url):
|
||||
parsed_url = urlparse.urlparse(url)
|
||||
try:
|
||||
filename = parsed_url.path
|
||||
except:
|
||||
# Si falla es porque la implementación de parsed_url no reconoce los atributos como "path"
|
||||
if len(parsed_url) >= 4:
|
||||
filename = parsed_url[2]
|
||||
else:
|
||||
filename = ""
|
||||
|
||||
if "/" in filename:
|
||||
filename = filename.split("/")[-1]
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
def get_domain_from_url(url):
|
||||
parsed_url = urlparse.urlparse(url)
|
||||
try:
|
||||
filename = parsed_url.netloc
|
||||
except:
|
||||
# Si falla es porque la implementación de parsed_url no reconoce los atributos como "path"
|
||||
if len(parsed_url) >= 4:
|
||||
filename = parsed_url[1]
|
||||
else:
|
||||
filename = ""
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
def get_season_and_episode(title):
|
||||
"""
|
||||
Retorna el numero de temporada y de episodio en formato "1x01" obtenido del titulo de un episodio
|
||||
Ejemplos de diferentes valores para title y su valor devuelto:
|
||||
"serie 101x1.strm", "s101e1.avi", "t101e1.avi" -> '101x01'
|
||||
"Name TvShow 1x6.avi" -> '1x06'
|
||||
"Temp 3 episodio 2.avi" -> '3x02'
|
||||
"Alcantara season 13 episodie 12.avi" -> '13x12'
|
||||
"Temp1 capitulo 14" -> '1x14'
|
||||
"Temporada 1: El origen Episodio 9" -> '' (entre el numero de temporada y los episodios no puede haber otro texto)
|
||||
"Episodio 25: titulo episodio" -> '' (no existe el numero de temporada)
|
||||
"Serie X Temporada 1" -> '' (no existe el numero del episodio)
|
||||
@type title: str
|
||||
@param title: titulo del episodio de una serie
|
||||
@rtype: str
|
||||
@return: Numero de temporada y episodio en formato "1x01" o cadena vacia si no se han encontrado
|
||||
"""
|
||||
filename = ""
|
||||
|
||||
patrons = ["(\d+)x(\d+)", "(?:s|t)(\d+)e(\d+)",
|
||||
"(?:season|temp\w*)\s*(\d+)\s*(?:capitulo|epi\w*)\s*(\d+)"]
|
||||
|
||||
for patron in patrons:
|
||||
try:
|
||||
matches = re.compile(patron, re.I).search(title)
|
||||
if matches:
|
||||
filename = matches.group(1) + "x" + matches.group(2).zfill(2)
|
||||
break
|
||||
except:
|
||||
pass
|
||||
|
||||
logger.info("'" + title + "' -> '" + filename + "'")
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
def get_sha1(cadena):
|
||||
try:
|
||||
import hashlib
|
||||
devuelve = hashlib.sha1(cadena).hexdigest()
|
||||
except:
|
||||
import sha
|
||||
import binascii
|
||||
devuelve = binascii.hexlify(sha.new(cadena).digest())
|
||||
|
||||
return devuelve
|
||||
|
||||
|
||||
def get_md5(cadena):
|
||||
try:
|
||||
import hashlib
|
||||
devuelve = hashlib.md5(cadena).hexdigest()
|
||||
except:
|
||||
import md5
|
||||
import binascii
|
||||
devuelve = binascii.hexlify(md5.new(cadena).digest())
|
||||
|
||||
return devuelve
|
||||
+2
-2
@@ -506,8 +506,8 @@ def get_server_json(server_name):
|
||||
|
||||
|
||||
def get_server_host(server_name):
|
||||
from core import scrapertoolsV2
|
||||
return [scrapertoolsV2.get_domain_from_url(pattern['url']) for pattern in get_server_json(server_name)['find_videos']['patterns']]
|
||||
from core import scrapertools
|
||||
return [scrapertools.get_domain_from_url(pattern['url']) for pattern in get_server_json(server_name)['find_videos']['patterns']]
|
||||
|
||||
|
||||
def get_server_controls_settings(server_name):
|
||||
|
||||
+31
-31
@@ -10,7 +10,7 @@ import urlparse
|
||||
import xbmcaddon
|
||||
|
||||
from channelselector import thumb
|
||||
from core import httptools, scrapertoolsV2, servertools, tmdb, channeltools
|
||||
from core import httptools, scrapertools, servertools, tmdb, channeltools
|
||||
from core.item import Item
|
||||
from lib import unshortenit
|
||||
from platformcode import logger, config
|
||||
@@ -21,7 +21,7 @@ def hdpass_get_servers(item):
|
||||
itemlist = []
|
||||
data = httptools.downloadpage(item.url).data.replace('\n', '')
|
||||
patron = r'<iframe(?: id="[^"]+")? width="[^"]+" height="[^"]+" src="([^"]+)"[^>]+><\/iframe>'
|
||||
url = scrapertoolsV2.find_single_match(data, patron).replace("?alta", "")
|
||||
url = scrapertools.find_single_match(data, patron).replace("?alta", "")
|
||||
url = url.replace("&download=1", "")
|
||||
if 'https' not in url:
|
||||
url = 'https:' + url
|
||||
@@ -37,20 +37,20 @@ def hdpass_get_servers(item):
|
||||
patron_mir = '<div class="row mobileMirrs">(.*?)</div>'
|
||||
patron_media = r'<input type="hidden" name="urlEmbed" data-mirror="([^"]+)" id="urlEmbed"\s*value="([^"]+)"\s*/>'
|
||||
|
||||
res = scrapertoolsV2.find_single_match(data, patron_res)
|
||||
res = scrapertools.find_single_match(data, patron_res)
|
||||
|
||||
itemlist = []
|
||||
|
||||
for res_url, res_video in scrapertoolsV2.find_multiple_matches(res, '<option.*?value="([^"]+?)">([^<]+?)</option>'):
|
||||
for res_url, res_video in scrapertools.find_multiple_matches(res, '<option.*?value="([^"]+?)">([^<]+?)</option>'):
|
||||
|
||||
data = httptools.downloadpage(urlparse.urljoin(url, res_url)).data.replace('\n', '')
|
||||
|
||||
mir = scrapertoolsV2.find_single_match(data, patron_mir)
|
||||
mir = scrapertools.find_single_match(data, patron_mir)
|
||||
|
||||
for mir_url, srv in scrapertoolsV2.find_multiple_matches(mir, '<option.*?value="([^"]+?)">([^<]+?)</value>'):
|
||||
for mir_url, srv in scrapertools.find_multiple_matches(mir, '<option.*?value="([^"]+?)">([^<]+?)</value>'):
|
||||
|
||||
data = httptools.downloadpage(urlparse.urljoin(url, mir_url)).data.replace('\n', '')
|
||||
for media_label, media_url in scrapertoolsV2.find_multiple_matches(data, patron_media):
|
||||
for media_label, media_url in scrapertools.find_multiple_matches(data, patron_media):
|
||||
itemlist.append(Item(channel=item.channel,
|
||||
action="play",
|
||||
fulltitle=item.fulltitle,
|
||||
@@ -168,13 +168,13 @@ def scrapeLang(scraped, lang, longtitle):
|
||||
return language, longtitle
|
||||
|
||||
def cleantitle(title):
|
||||
cleantitle = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(title).replace('"', "'").replace('×', 'x').replace('–', '-')).strip()
|
||||
cleantitle = scrapertools.htmlclean(scrapertools.decodeHtmlentities(title).replace('"', "'").replace('×', 'x').replace('–', '-')).strip()
|
||||
return cleantitle
|
||||
|
||||
def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, search, pag, function, lang):
|
||||
itemlist = []
|
||||
log("scrapeBlock qui", block, patron)
|
||||
matches = scrapertoolsV2.find_multiple_matches_groups(block, patron)
|
||||
matches = scrapertools.find_multiple_matches_groups(block, patron)
|
||||
log('MATCHES =', matches)
|
||||
|
||||
if debug:
|
||||
@@ -214,7 +214,7 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
|
||||
for kk in known_keys:
|
||||
val = match[listGroups.index(kk)] if kk in listGroups else ''
|
||||
if val and (kk == "url" or kk == 'thumb') and 'http' not in val:
|
||||
val = scrapertoolsV2.find_single_match(item.url, 'https?://[a-z0-9.-]+') + val
|
||||
val = scrapertools.find_single_match(item.url, 'https?://[a-z0-9.-]+') + val
|
||||
scraped[kk] = val
|
||||
|
||||
if scraped['season']:
|
||||
@@ -227,7 +227,7 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
|
||||
episode = ''
|
||||
else:
|
||||
episode = re.sub(r'\s-\s|-|x|–|×|×', 'x', scraped['episode']) if scraped['episode'] else ''
|
||||
second_episode = scrapertoolsV2.find_single_match(episode,'x\d+x(\d+)')
|
||||
second_episode = scrapertools.find_single_match(episode, 'x\d+x(\d+)')
|
||||
if second_episode: episode = re.sub(r'(\d+x\d+)x\d+',r'\1-', episode) + second_episode.zfill(2)
|
||||
|
||||
#episode = re.sub(r'\s-\s|-|x|–|×', 'x', scraped['episode']) if scraped['episode'] else ''
|
||||
@@ -257,18 +257,18 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
|
||||
if scraped["plot"]:
|
||||
infolabels['plot'] = plot
|
||||
if scraped['duration']:
|
||||
matches = scrapertoolsV2.find_multiple_matches(scraped['duration'],
|
||||
matches = scrapertools.find_multiple_matches(scraped['duration'],
|
||||
r'([0-9])\s*?(?:[hH]|:|\.|,|\\|\/|\||\s)\s*?([0-9]+)')
|
||||
for h, m in matches:
|
||||
scraped['duration'] = int(h) * 60 + int(m)
|
||||
if not matches:
|
||||
scraped['duration'] = scrapertoolsV2.find_single_match(scraped['duration'], r'(\d+)')
|
||||
scraped['duration'] = scrapertools.find_single_match(scraped['duration'], r'(\d+)')
|
||||
infolabels['duration'] = int(scraped['duration']) * 60
|
||||
if scraped['genere']:
|
||||
genres = scrapertoolsV2.find_multiple_matches(scraped['genere'], '[A-Za-z]+')
|
||||
genres = scrapertools.find_multiple_matches(scraped['genere'], '[A-Za-z]+')
|
||||
infolabels['genere'] = ", ".join(genres)
|
||||
if scraped["rating"]:
|
||||
infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scraped["rating"])
|
||||
infolabels['rating'] = scrapertools.decodeHtmlentities(scraped["rating"])
|
||||
|
||||
AC = CT = ''
|
||||
if typeContentDict:
|
||||
@@ -379,11 +379,11 @@ def scrape(func):
|
||||
if not data:
|
||||
page = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True, session=item.session)
|
||||
# if url may be changed and channel has findhost to update
|
||||
if (not page.data or scrapertoolsV2.get_domain_from_url(page.url) != scrapertoolsV2.get_domain_from_url(item.url)) and 'findhost' in func.__globals__:
|
||||
if (not page.data or scrapertools.get_domain_from_url(page.url) != scrapertools.get_domain_from_url(item.url)) and 'findhost' in func.__globals__:
|
||||
host = func.__globals__['findhost']()
|
||||
from core import jsontools
|
||||
jsontools.update_node(host, func.__module__.split('.')[-1], 'url')
|
||||
item.url = item.url.replace(scrapertoolsV2.get_domain_from_url(item.url), scrapertoolsV2.get_domain_from_url(host))
|
||||
item.url = item.url.replace(scrapertools.get_domain_from_url(item.url), scrapertools.get_domain_from_url(host))
|
||||
page = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True,
|
||||
session=item.session)
|
||||
data = page.data.replace("'", '"')
|
||||
@@ -394,7 +394,7 @@ def scrape(func):
|
||||
if patronBlock:
|
||||
if debugBlock:
|
||||
regexDbg(item, patronBlock, headers, data)
|
||||
blocks = scrapertoolsV2.find_multiple_matches_groups(data, patronBlock)
|
||||
blocks = scrapertools.find_multiple_matches_groups(data, patronBlock)
|
||||
block = ""
|
||||
for bl in blocks:
|
||||
# log(len(blocks),bl)
|
||||
@@ -443,7 +443,7 @@ def scrape(func):
|
||||
if anime:
|
||||
if function == 'episodios' or item.action == 'episodios': autorenumber.renumber(itemlist, item, 'bold')
|
||||
else: autorenumber.renumber(itemlist)
|
||||
if anime and autorenumber.check(item) == False and not scrapertoolsV2.find_single_match(itemlist[0].title, r'(\d+.\d+)'):
|
||||
if anime and autorenumber.check(item) == False and not scrapertools.find_single_match(itemlist[0].title, r'(\d+.\d+)'):
|
||||
pass
|
||||
else:
|
||||
if addVideolibrary and (item.infoLabels["title"] or item.fulltitle):
|
||||
@@ -471,7 +471,7 @@ def dooplay_get_links(item, host):
|
||||
|
||||
data = httptools.downloadpage(item.url).data.replace("'", '"')
|
||||
patron = r'<li id="player-option-[0-9]".*?data-type="([^"]+)" data-post="([^"]+)" data-nume="([^"]+)".*?<span class="title".*?>([^<>]+)</span>(?:<span class="server">([^<>]+))?'
|
||||
matches = scrapertoolsV2.find_multiple_matches(data, patron)
|
||||
matches = scrapertools.find_multiple_matches(data, patron)
|
||||
|
||||
ret = []
|
||||
|
||||
@@ -483,7 +483,7 @@ def dooplay_get_links(item, host):
|
||||
"type": type
|
||||
})
|
||||
dataAdmin = httptools.downloadpage(host + '/wp-admin/admin-ajax.php', post=postData,headers={'Referer': item.url}).data
|
||||
link = scrapertoolsV2.find_single_match(dataAdmin, "<iframe.*src='([^']+)'")
|
||||
link = scrapertools.find_single_match(dataAdmin, "<iframe.*src='([^']+)'")
|
||||
ret.append({
|
||||
'url': link,
|
||||
'title': title,
|
||||
@@ -560,25 +560,25 @@ def swzz_get_url(item):
|
||||
if "/link/" in item.url:
|
||||
data = httptools.downloadpage(item.url, headers=headers).data
|
||||
if "link =" in data:
|
||||
data = scrapertoolsV2.find_single_match(data, 'link = "([^"]+)"')
|
||||
data = scrapertools.find_single_match(data, 'link = "([^"]+)"')
|
||||
if 'http' not in data:
|
||||
data = 'https:' + data
|
||||
else:
|
||||
match = scrapertoolsV2.find_single_match(data, r'<meta name="og:url" content="([^"]+)"')
|
||||
match = scrapertoolsV2.find_single_match(data, r'URL=([^"]+)">') if not match else match
|
||||
match = scrapertools.find_single_match(data, r'<meta name="og:url" content="([^"]+)"')
|
||||
match = scrapertools.find_single_match(data, r'URL=([^"]+)">') if not match else match
|
||||
|
||||
if not match:
|
||||
from lib import jsunpack
|
||||
|
||||
try:
|
||||
data = scrapertoolsV2.find_single_match(data.replace('\n', ''), r"(eval\s?\(function\(p,a,c,k,e,d.*?)</script>")
|
||||
data = scrapertools.find_single_match(data.replace('\n', ''), r"(eval\s?\(function\(p,a,c,k,e,d.*?)</script>")
|
||||
data = jsunpack.unpack(data)
|
||||
|
||||
logger.debug("##### play /link/ unpack ##\n%s\n##" % data)
|
||||
except:
|
||||
logger.debug("##### The content is yet unpacked ##\n%s\n##" % data)
|
||||
|
||||
data = scrapertoolsV2.find_single_match(data, r'var link(?:\s)?=(?:\s)?"([^"]+)";')
|
||||
data = scrapertools.find_single_match(data, r'var link(?:\s)?=(?:\s)?"([^"]+)";')
|
||||
data, c = unshortenit.unwrap_30x_only(data)
|
||||
else:
|
||||
data = match
|
||||
@@ -753,7 +753,7 @@ def typo(string, typography=''):
|
||||
if 'submenu' in string:
|
||||
string = u"\u2022\u2022 ".encode('utf-8') + re.sub(r'\ssubmenu','',string)
|
||||
if 'color' in string:
|
||||
color = scrapertoolsV2.find_single_match(string,'color ([a-z]+)')
|
||||
color = scrapertools.find_single_match(string, 'color ([a-z]+)')
|
||||
if color == 'kod' or '': color = kod_color
|
||||
string = '[COLOR '+ color +']' + re.sub(r'\scolor\s([a-z]+)','',string) + '[/COLOR]'
|
||||
if 'bold' in string:
|
||||
@@ -785,13 +785,13 @@ def match(item, patron='', patronBlock='', headers='', url='', post=''):
|
||||
log('DATA= ', data)
|
||||
|
||||
if patronBlock:
|
||||
block = scrapertoolsV2.find_single_match(data, patronBlock)
|
||||
block = scrapertools.find_single_match(data, patronBlock)
|
||||
log('BLOCK= ',block)
|
||||
else:
|
||||
block = data
|
||||
|
||||
if patron:
|
||||
matches = scrapertoolsV2.find_multiple_matches(block, patron)
|
||||
matches = scrapertools.find_multiple_matches(block, patron)
|
||||
log('MATCHES= ',matches)
|
||||
|
||||
return matches, block
|
||||
@@ -899,12 +899,12 @@ def nextPage(itemlist, item, data='', patron='', function_or_level=1, next_page=
|
||||
# If the call is direct, leave it blank
|
||||
action = inspect.stack()[function_or_level][3] if type(function_or_level) == int else function_or_level
|
||||
if next_page == '':
|
||||
next_page = scrapertoolsV2.find_single_match(data, patron)
|
||||
next_page = scrapertools.find_single_match(data, patron)
|
||||
|
||||
if next_page != "":
|
||||
if resub: next_page = re.sub(resub[0], resub[1], next_page)
|
||||
if 'http' not in next_page:
|
||||
next_page = scrapertoolsV2.find_single_match(item.url, 'https?://[a-z0-9.-]+') + next_page
|
||||
next_page = scrapertools.find_single_match(item.url, 'https?://[a-z0-9.-]+') + next_page
|
||||
next_page = re.sub('&', '&',next_page)
|
||||
log('NEXT= ', next_page)
|
||||
itemlist.append(
|
||||
|
||||
Reference in New Issue
Block a user