- nuovo metodo di override DNS
- aggiunta opzione nascondi server, se usi l'autoplay
- migliorie al codice e fix vari
This commit is contained in:
marco
2020-01-08 19:19:59 +01:00
parent d1cc659707
commit b4376525de
139 changed files with 6078 additions and 8909 deletions
-112
View File
@@ -1,112 +0,0 @@
# -*- coding: utf-8 -*-
# --------------------------------------------------------------------------------
# Cloudflare decoder
# --------------------------------------------------------------------------------
import re
import time
import urllib
import urlparse
from platformcode import logger
class Cloudflare:
def __init__(self, response):
self.timeout = 5
self.domain = urlparse.urlparse(response["url"])[1]
self.protocol = urlparse.urlparse(response["url"])[0]
self.js_data = {}
self.header_data = {}
if not "var s,t,o,p,b,r,e,a,k,i,n,g,f" in response["data"] or "chk_jschl" in response["url"]:
return
try:
self.js_data["data"] = response["data"]
self.js_data["auth_url"] = \
re.compile('<form id="challenge-form" action="([^"]+)" method="get">').findall(response["data"])[0]
self.js_data["params"] = {}
self.js_data["params"]["jschl_vc"] = \
re.compile('<input type="hidden" name="jschl_vc" value="([^"]+)"/>').findall(response["data"])[0]
self.js_data["params"]["pass"] = \
re.compile('<input type="hidden" name="pass" value="([^"]+)"/>').findall(response["data"])[0]
self.js_data["wait"] = int(re.compile("\}, ([\d]+)\);", re.MULTILINE).findall(response["data"])[0]) / 1000
self.js_data["params"]["s"] = \
re.compile('<input type="hidden" name="s" value="([^"]+)"').findall(response["data"])[0]
except:
logger.debug("Metodo #1 (javascript): NO disponible")
self.js_data = {}
if "refresh" in response["headers"]:
try:
self.header_data["wait"] = int(response["headers"]["refresh"].split(";")[0])
self.header_data["auth_url"] = response["headers"]["refresh"].split("=")[1].split("?")[0]
self.header_data["params"] = {}
self.header_data["params"]["pass"] = response["headers"]["refresh"].split("=")[2]
except:
logger.debug("Metodo #2 (headers): NO disponible")
self.header_data = {}
def solve_cf(self, body, domain):
js = re.search(
r"setTimeout\(function\(\){\s+(var s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n",
body
).group(1)
js = re.sub(r"a\.value = ((.+).toFixed\(10\))?", r"\1", js)
js = re.sub(r'(e\s=\sfunction\(s\)\s{.*?};)', '', js, flags=re.DOTALL|re.MULTILINE)
js = re.sub(r"\s{3,}[a-z](?: = |\.).+", "", js).replace("t.length", str(len(domain)))
js = js.replace('; 121', '')
js = re.sub(r"[\n\\']", "", js)
jsEnv = """
var t = "{domain}";
var g = String.fromCharCode;
o = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
e = function(s) {{
s += "==".slice(2 - (s.length & 3));
var bm, r = "", r1, r2, i = 0;
for (; i < s.length;) {{
bm = o.indexOf(s.charAt(i++)) << 18 | o.indexOf(s.charAt(i++)) << 12 | (r1 = o.indexOf(s.charAt(i++))) << 6 | (r2 = o.indexOf(s.charAt(i++)));
r += r1 === 64 ? g(bm >> 16 & 255) : r2 === 64 ? g(bm >> 16 & 255, bm >> 8 & 255) : g(bm >> 16 & 255, bm >> 8 & 255, bm & 255);
}}
return r;
}};
function italics (str) {{ return '<i>' + this + '</i>'; }};
var document = {{
getElementById: function () {{
return {{'innerHTML': '{innerHTML}'}};
}}
}};
{js}
"""
innerHTML = re.search('<div(?: [^<>]*)? id="([^<>]*?)">([^<>]*?)<\/div>', body , re.MULTILINE | re.DOTALL)
innerHTML = innerHTML.group(2).replace("'", r"\'") if innerHTML else ""
import js2py
from jsc import jsunc
js = jsunc(jsEnv.format(domain=domain, innerHTML=innerHTML, js=js))
def atob(s):
return base64.b64decode('{}'.format(s)).decode('utf-8')
js2py.disable_pyimport()
context = js2py.EvalJs({'atob': atob})
result = context.eval(js)
return float(result)
@property
def wait_time(self):
if self.js_data.get("wait", 0):
return self.js_data["wait"]
else:
return self.header_data.get("wait", 0)
@property
def is_cloudflare(self):
return self.header_data.get("wait", 0) > 0 or self.js_data.get("wait", 0) > 0
def get_url(self):
# Metodo #1 (javascript)
if self.js_data.get("wait", 0):
self.js_data["params"]["jschl_answer"] = self.solve_cf(self.js_data["data"], self.domain)
response = "%s://%s%s?%s" % (
self.protocol, self.domain, self.js_data["auth_url"], urllib.urlencode(self.js_data["params"]))
time.sleep(self.js_data["wait"])
return response
+113 -296
View File
@@ -17,7 +17,7 @@ from threading import Lock
from core.jsontools import to_utf8
from platformcode import config, logger
from platformcode.logger import WebErrorException
from core import scrapertoolsV2
from core import scrapertools
# Get the addon version
__version = config.get_addon_version()
@@ -48,7 +48,7 @@ def get_user_agent():
def get_url_headers(url, forced=False):
domain = urlparse.urlparse(url)[1]
sub_dom = scrapertoolsV2.find_single_match(domain, r'\.(.*?\.\w+)')
sub_dom = scrapertools.find_single_match(domain, r'\.(.*?\.\w+)')
if sub_dom and not 'google' in url:
domain = sub_dom
domain_cookies = cj._cookies.get("." + domain, {}).get("/", {})
@@ -144,34 +144,6 @@ def random_useragent():
return default_headers["User-Agent"]
def channel_proxy_list(url, forced_proxy=None):
import base64
import ast
try:
proxy_channel_bloqued_str = base64.b64decode(config.get_setting
('proxy_channel_bloqued')).decode('utf-8')
proxy_channel_bloqued = dict()
proxy_channel_bloqued = ast.literal_eval(proxy_channel_bloqued_str)
except:
logger.debug('Proxytools not initialized correctly')
return False
if not url.endswith('/'):
url += '/'
if scrapertoolsV2.find_single_match(url, r'(?:http.*\:)?\/\/(?:www\.)?([^\?|\/]+)(?:\?|\/)') \
in proxy_channel_bloqued:
if forced_proxy and forced_proxy not in ['Total', 'ProxyDirect', 'ProxyCF', 'ProxyWeb']:
if forced_proxy in proxy_channel_bloqued[scrapertoolsV2.find_single_match(url, r'(?:http.*\:)?\/\/(?:www\.)?([^\?|\/]+)(?:\?|\/)')]:
return True
else:
return False
if forced_proxy:
return True
if not 'OFF' in proxy_channel_bloqued[scrapertoolsV2.find_single_match(url, r'(?:http.*\:)?\/\/(?:www\.)?([^\?|\/]+)(?:\?|\/)')]:
return True
return False
def show_infobox(info_dict):
logger.info()
@@ -232,137 +204,6 @@ def show_infobox(info_dict):
logger.info('%s%s%s' % (box['r_dn_corner'], box['fill'] * width, box['l_dn_corner']))
return
def check_proxy(url, **opt):
proxy_data = dict()
proxy_data['dict'] = {}
proxy = opt.get('proxy', True)
proxy_web = opt.get('proxy_web', False)
proxy_addr_forced = opt.get('proxy_addr_forced', None)
forced_proxy = opt.get('forced_proxy', None)
try:
if (proxy or proxy_web) and (forced_proxy or proxy_addr_forced or
channel_proxy_list(url, forced_proxy=forced_proxy)):
import proxytools
proxy_data['addr'], proxy_data['CF_addr'], proxy_data['web_name'], \
proxy_data['log'] = proxytools.get_proxy_addr(url, post=opt.get('post', None), forced_proxy=forced_proxy)
if proxy_addr_forced and proxy_data['log']:
proxy_data['log'] = scrapertoolsV2.find_single_match(str(proxy_addr_forced), r"{'http.*':\s*'(.*?)'}")
if proxy and proxy_data['addr']:
if proxy_addr_forced: proxy_data['addr'] = proxy_addr_forced
proxy_data['dict'] = proxy_data['addr']
proxy_data['stat'] = ', Proxy Direct ' + proxy_data['log']
elif proxy and proxy_data['CF_addr']:
if proxy_addr_forced: proxy_data['CF_addr'] = proxy_addr_forced
proxy_data['dict'] = proxy_data['CF_addr']
proxy_data['stat'] = ', Proxy CF ' + proxy_data['log']
elif proxy and proxy_addr_forced:
proxy_data['addr'] = proxy_addr_forced
proxy_data['dict'] = proxy_data['addr']
proxy_data['stat'] = ', Proxy Direct ' + proxy_data['log']
elif proxy and not proxy_data['addr'] and not proxy_data['CF_addr'] \
and not proxy_addr_forced:
proxy = False
if not proxy_data['web_name']:
proxy_data['addr'], proxy_data['CF_addr'], proxy_data['web_name'], \
proxy_data['log'] = proxytools.get_proxy_addr(url, forced_proxy='Total')
if proxy_data['web_name']:
proxy_web = True
else:
proxy_web = False
if proxy_data['addr']:
proxy = True
proxy_data['dict'] = proxy_data['addr']
proxy_data['stat'] = ', Proxy Direct ' + proxy_data['log']
if proxy_web and proxy_data['web_name']:
if opt.get('post', None): proxy_data['log'] = '(POST) ' + proxy_data['log']
url, opt['post'], headers_proxy, proxy_data['web_name'] = \
proxytools.set_proxy_web(url, proxy_data['web_name'], post=opt.get('post', None))
if proxy_data['web_name']:
proxy_data['stat'] = ', Proxy Web ' + proxy_data['log']
if headers_proxy:
request_headers.update(dict(headers_proxy))
if proxy_web and not proxy_data['web_name']:
proxy_web = False
proxy_data['addr'], proxy_data['CF_addr'], proxy_data['web_name'], \
proxy_data['log'] = proxytools.get_proxy_addr(url, forced_proxy='Total')
if proxy_data['CF_addr']:
proxy = True
proxy_data['dict'] = proxy_data['CF_addr']
proxy_data['stat'] = ', Proxy CF ' + proxy_data['log']
elif proxy_data['addr']:
proxy = True
proxy_data['dict'] = proxy_data['addr']
proxy_data['stat'] = ', Proxy Direct ' + proxy_data['log']
except:
import traceback
logger.error(traceback.format_exc())
opt['proxy'] = ''
opt['proxy_web'] = ''
proxy_data['stat'] = ''
proxy_data['addr'] = ''
proxy_data['CF_addr'] = ''
proxy_data['dict'] = {}
proxy_data['web_name'] = ''
proxy_data['log'] = ''
url = opt['url_save']
try:
proxy_data['addr']['https'] = str('https://'+ proxy_data['addr']['https'])
except:
pass
return url, proxy_data, opt
def proxy_post_processing(url, proxy_data, response, opt):
opt['out_break'] = False
try:
if ', Proxy Web' in proxy_data.get('stat', ''):
import proxytools
response["data"] = proxytools.restore_after_proxy_web(response["data"],
proxy_data['web_name'], opt['url_save'])
if response["data"] == 'ERROR':
response['sucess'] = False
if response["code"] == 302:
proxy_data['stat'] = ', Proxy Direct'
opt['forced_proxy'] = 'ProxyDirect'
url = opt['url_save']
opt['post'] = opt['post_save']
response['sucess'] = False
if proxy_data.get('stat', '') and response['sucess'] == False and \
opt.get('proxy_retries_counter', 0) <= opt.get('proxy_retries', 1) and opt.get('count_retries_tot', 5) > 1:
import proxytools
if ', Proxy Direct' in proxy_data.get('stat', ''):
proxytools.get_proxy_list_method(proxy_init='ProxyDirect',
error_skip=proxy_data['addr'], url_test=url)
elif ', Proxy CF' in proxy_data.get('stat', ''):
proxytools.get_proxy_list_method(proxy_init='ProxyCF',
error_skip=proxy_data['CF_addr'])
url = opt['url_save']
elif ', Proxy Web' in proxy_data.get('stat', ''):
if channel_proxy_list(opt['url_save'], forced_proxy=proxy_data['web_name']):
opt['forced_proxy'] = 'ProxyCF'
url =opt['url_save']
opt['post'] = opt['post_save']
else:
proxytools.get_proxy_list_method(proxy_init='ProxyWeb',
error_skip=proxy_data['web_name'])
url =opt['url_save']
opt['post'] = opt['post_save']
else:
opt['out_break'] = True
except:
import traceback
logger.error(traceback.format_exc())
opt['out_break'] = True
return response["data"], response['sucess'], url, opt
def downloadpage(url, **opt):
@@ -410,29 +251,21 @@ def downloadpage(url, **opt):
"""
load_cookies()
# if scrapertoolsV2.get_domain_from_url(url) in ['www.seriehd.moda', 'wstream.video', 'www.guardaserie.media', 'akvideo.stream','www.piratestreaming.top']: # cloudflare urls
# if opt.get('session', False):
# session = opt['session'] # same session to speed up search
# else:
# from lib import cloudscraper
# session = cloudscraper.create_scraper()
# else:
# from lib import requests
# session = requests.session()
if opt.get('session', False):
session = opt['session'] # same session to speed up search
logger.info('same session')
elif opt.get('use_requests', False):
from lib import requests
session = requests.session()
else:
if urlparse.urlparse(url).netloc in ['www.guardaserie.media', 'casacinema.space']:
from lib import cloudscraper
session = cloudscraper.create_scraper()
elif opt.get('session', False):
session = opt['session'] # same session to speed up search
logger.info('same session')
elif config.get_setting('resolver_dns') and not opt.get('use_requests', False):
from specials import resolverdns
session = resolverdns.session()
else:
from lib import requests
session = requests.session()
# Headers by default, if nothing is specified
req_headers = default_headers.copy()
verify = opt.get('verify', True)
# Headers passed as parameters
if opt.get('headers', None) is not None:
@@ -445,148 +278,132 @@ def downloadpage(url, **opt):
req_headers['User-Agent'] = random_useragent()
url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]")
opt['proxy_retries_counter'] = 0
opt['url_save'] = url
opt['post_save'] = opt.get('post', None)
while opt['proxy_retries_counter'] <= opt.get('proxy_retries', 1):
response = {}
info_dict = []
payload = dict()
files = {}
file_name = ''
opt['proxy_retries_counter'] += 1
response = {}
info_dict = []
payload = dict()
files = {}
file_name = ''
session.verify = opt.get('verify', True)
session.verify = opt.get('verify', verify)
if opt.get('cookies', True):
session.cookies = cj
session.headers.update(req_headers)
if opt.get('cookies', True):
session.cookies = cj
session.headers.update(req_headers)
# Prepare the url in case you need a proxy, or if proxies are sent from the channel
# url, proxy_data, opt = check_proxy(url, **opt)
# if opt.get('proxies', None) is not None:
# session.proxies = opt['proxies']
# elif proxy_data.get('dict', {}):
# session.proxies = proxy_data['dict']
proxy_data = {'dict': {}}
proxy_data = {'dict': {}}
inicio = time.time()
inicio = time.time()
if opt.get('timeout', None) is None and HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT is not None:
opt['timeout'] = HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT
if opt['timeout'] == 0: opt['timeout'] = None
if opt.get('timeout', None) is None and HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT is not None:
opt['timeout'] = HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT
if opt['timeout'] == 0: opt['timeout'] = None
if len(url) > 0:
try:
if opt.get('post', None) is not None or opt.get('file', None) is not None:
if opt.get('post', None) is not None:
# Convert string post in dict
try:
json.loads(opt['post'])
payload = opt['post']
except:
if not isinstance(opt['post'], dict):
post = urlparse.parse_qs(opt['post'], keep_blank_values=1)
payload = dict()
if len(url) > 0:
try:
if opt.get('post', None) is not None or opt.get('file', None) is not None:
if opt.get('post', None) is not None:
# Convert string post in dict
try:
json.loads(opt['post'])
payload = opt['post']
except:
if not isinstance(opt['post'], dict):
post = urlparse.parse_qs(opt['post'], keep_blank_values=1)
payload = dict()
for key, value in post.items():
try:
payload[key] = value[0]
except:
payload[key] = ''
else:
payload = opt['post']
# Verify 'file' and 'file_name' options to upload a buffer or file
if opt.get('file', None) is not None:
if os.path.isfile(opt['file']):
if opt.get('file_name', None) is None:
path_file, opt['file_name'] = os.path.split(opt['file'])
files = {'file': (opt['file_name'], open(opt['file'], 'rb'))}
file_name = opt['file']
for key, value in post.items():
try:
payload[key] = value[0]
except:
payload[key] = ''
else:
files = {'file': (opt.get('file_name', 'Default'), opt['file'])}
file_name = opt.get('file_name', 'Default') + ', Buffer de memoria'
payload = opt['post']
info_dict = fill_fields_pre(url, opt, proxy_data, file_name)
if opt.get('only_headers', False):
# Makes the request with HEAD method
req = session.head(url, allow_redirects=opt.get('follow_redirects', True),
timeout=opt['timeout'])
# Verify 'file' and 'file_name' options to upload a buffer or file
if opt.get('file', None) is not None:
if os.path.isfile(opt['file']):
if opt.get('file_name', None) is None:
path_file, opt['file_name'] = os.path.split(opt['file'])
files = {'file': (opt['file_name'], open(opt['file'], 'rb'))}
file_name = opt['file']
else:
# Makes the request with POST method
req = session.post(url, data=payload, allow_redirects=opt.get('follow_redirects', True),
files=files, timeout=opt['timeout'])
files = {'file': (opt.get('file_name', 'Default'), opt['file'])}
file_name = opt.get('file_name', 'Default') + ', Buffer de memoria'
elif opt.get('only_headers', False):
info_dict = fill_fields_pre(url, opt, proxy_data, file_name)
info_dict = fill_fields_pre(url, opt, proxy_data, file_name)
if opt.get('only_headers', False):
# Makes the request with HEAD method
req = session.head(url, allow_redirects=opt.get('follow_redirects', True),
timeout=opt['timeout'])
else:
info_dict = fill_fields_pre(url, opt, proxy_data, file_name)
# Makes the request with GET method
req = session.get(url, allow_redirects=opt.get('follow_redirects', True),
timeout=opt['timeout'])
# Makes the request with POST method
req = session.post(url, data=payload, allow_redirects=opt.get('follow_redirects', True),
files=files, timeout=opt['timeout'])
except Exception as e:
from lib import requests
if not opt.get('ignore_response_code', False) and not proxy_data.get('stat', ''):
req = requests.Response()
response['data'] = ''
response['sucess'] = False
info_dict.append(('Success', 'False'))
response['code'] = str(e)
info_dict.append(('Response code', str(e)))
info_dict.append(('Finalizado en', time.time() - inicio))
if not opt.get('alfa_s', False):
show_infobox(info_dict)
return type('HTTPResponse', (), response)
else:
req = requests.Response()
req.status_code = str(e)
elif opt.get('only_headers', False):
info_dict = fill_fields_pre(url, opt, proxy_data, file_name)
# Makes the request with HEAD method
req = session.head(url, allow_redirects=opt.get('follow_redirects', True),
timeout=opt['timeout'])
else:
info_dict = fill_fields_pre(url, opt, proxy_data, file_name)
# Makes the request with GET method
req = session.get(url, allow_redirects=opt.get('follow_redirects', True),
timeout=opt['timeout'])
except Exception as e:
from lib import requests
if not opt.get('ignore_response_code', False) and not proxy_data.get('stat', ''):
response['data'] = ''
response['sucess'] = False
info_dict.append(('Success', 'False'))
response['code'] = str(e)
info_dict.append(('Response code', str(e)))
info_dict.append(('Finalizado en', time.time() - inicio))
if not opt.get('alfa_s', False):
show_infobox(info_dict)
return type('HTTPResponse', (), response)
else:
req = requests.Response()
req.status_code = str(e)
else:
response['data'] = ''
response['sucess'] = False
response['code'] = ''
return type('HTTPResponse', (), response)
else:
response['data'] = ''
response['sucess'] = False
response['code'] = ''
return type('HTTPResponse', (), response)
response_code = req.status_code
response_code = req.status_code
response['data'] = req.content
response['url'] = req.url
if not response['data']:
response['data'] = ''
try:
response['json'] = to_utf8(req.json())
except:
response['json'] = dict()
response['code'] = response_code
response['headers'] = req.headers
response['cookies'] = req.cookies
response['data'] = req.content
response['url'] = req.url
if not response['data']:
response['data'] = ''
try:
response['json'] = to_utf8(req.json())
except:
response['json'] = dict()
response['code'] = response_code
response['headers'] = req.headers
response['cookies'] = req.cookies
info_dict, response = fill_fields_post(info_dict, req, response, req_headers, inicio)
info_dict, response = fill_fields_post(info_dict, req, response, req_headers, inicio)
if opt.get('cookies', True):
save_cookies(alfa_s=opt.get('alfa_s', False))
if opt.get('cookies', True):
save_cookies(alfa_s=opt.get('alfa_s', False))
# is_channel = inspect.getmodule(inspect.currentframe().f_back)
# is_channel = scrapertoolsV2.find_single_match(str(is_channel), "<module '(channels).*?'")
# if is_channel and isinstance(response_code, int):
# if not opt.get('ignore_response_code', False) and not proxy_data.get('stat', ''):
# if response_code > 399:
# show_infobox(info_dict)
# raise WebErrorException(urlparse.urlparse(url)[1])
# is_channel = inspect.getmodule(inspect.currentframe().f_back)
# is_channel = scrapertools.find_single_match(str(is_channel), "<module '(channels).*?'")
# if is_channel and isinstance(response_code, int):
# if not opt.get('ignore_response_code', False) and not proxy_data.get('stat', ''):
# if response_code > 399:
# show_infobox(info_dict)
# raise WebErrorException(urlparse.urlparse(url)[1])
if not 'api.themoviedb' in url and not opt.get('alfa_s', False):
show_infobox(info_dict)
# If there is a proxy error, refresh the list and retry the number indicated in proxy_retries
# response['data'], response['sucess'], url, opt = proxy_post_processing(url, proxy_data, response, opt)
# if opt.get('out_break', False):
# break
if not 'api.themoviedb' in url and not opt.get('alfa_s', False):
show_infobox(info_dict)
return type('HTTPResponse', (), response)
File diff suppressed because one or more lines are too long
+76 -82
View File
@@ -1,27 +1,17 @@
# -*- coding: utf-8 -*-
# --------------------------------------------------------------------------------
# Scraper tools for reading and processing web elements
# Scraper tools v2 for reading and processing web elements
# --------------------------------------------------------------------------------
import re
import time
from core import httptools
import urlparse
from core.entities import html5
from platformcode import logger
def get_header_from_response(url, header_to_get="", post=None, headers=None):
header_to_get = header_to_get.lower()
response = httptools.downloadpage(url, post=post, headers=headers, only_headers=True)
return response.headers.get(header_to_get)
def read_body_and_headers(url, post=None, headers=None, follow_redirects=False, timeout=None):
response = httptools.downloadpage(url, post=post, headers=headers, follow_redirects=follow_redirects,
timeout=timeout)
return response.data, response.headers
def printMatches(matches):
i = 0
for match in matches:
@@ -42,8 +32,37 @@ def find_multiple_matches(text, pattern):
return re.findall(pattern, text, re.DOTALL)
def entityunescape(cadena):
return unescape(cadena)
def find_multiple_matches_groups(text, pattern):
r = re.compile(pattern)
return [m.groupdict() for m in r.finditer(text)]
# Convierte los codigos html "&ntilde;" y lo reemplaza por "ñ" caracter unicode utf-8
def decodeHtmlentities(data):
entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8})(;?)")
def substitute_entity(match):
ent = match.group(2) + match.group(3)
res = ""
while not ent in html5 and not ent.endswith(";") and match.group(1) != "#":
# Excepción para cuando '&' se usa como argumento en la urls contenidas en los datos
try:
res = ent[-1] + res
ent = ent[:-1]
except:
break
if match.group(1) == "#":
ent = unichr(int(ent.replace(";", "")))
return ent.encode('utf-8')
else:
cp = html5.get(ent)
if cp:
return cp.decode("unicode-escape").encode('utf-8') + res
else:
return match.group()
return entity_re.subn(substitute_entity, data)[0]
def unescape(text):
@@ -84,47 +103,6 @@ def unescape(text):
# Convierte los codigos html "&ntilde;" y lo reemplaza por "ñ" caracter unicode utf-8
def decodeHtmlentities(string):
string = entitiesfix(string)
entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
def substitute_entity(match):
from htmlentitydefs import name2codepoint as n2cp
ent = match.group(2)
if match.group(1) == "#":
return unichr(int(ent)).encode('utf-8')
else:
cp = n2cp.get(ent)
if cp:
return unichr(cp).encode('utf-8')
else:
return match.group()
return entity_re.subn(substitute_entity, string)[0]
def entitiesfix(string):
# Las entidades comienzan siempre con el símbolo & , y terminan con un punto y coma ( ; ).
string = string.replace("&aacute", "&aacute;")
string = string.replace("&eacute", "&eacute;")
string = string.replace("&iacute", "&iacute;")
string = string.replace("&oacute", "&oacute;")
string = string.replace("&uacute", "&uacute;")
string = string.replace("&Aacute", "&Aacute;")
string = string.replace("&Eacute", "&Eacute;")
string = string.replace("&Iacute", "&Iacute;")
string = string.replace("&Oacute", "&Oacute;")
string = string.replace("&Uacute", "&Uacute;")
string = string.replace("&uuml", "&uuml;")
string = string.replace("&Uuml", "&Uuml;")
string = string.replace("&ntilde", "&ntilde;")
string = string.replace("&#191", "&#191;")
string = string.replace("&#161", "&#161;")
string = string.replace(";;", ";")
return string
def htmlclean(cadena):
cadena = re.compile("<!--.*?-->", re.DOTALL).sub("", cadena)
@@ -226,7 +204,7 @@ def htmlclean(cadena):
cadena = re.compile("<link[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("\t", "")
cadena = entityunescape(cadena)
# cadena = entityunescape(cadena)
return cadena
@@ -314,8 +292,8 @@ def remove_show_from_title(title, show):
return title
# scrapertools.get_filename_from_url(media_url)[-4:]
def get_filename_from_url(url):
import urlparse
parsed_url = urlparse.urlparse(url)
try:
filename = parsed_url.path
@@ -332,19 +310,18 @@ def get_filename_from_url(url):
return filename
# def get_domain_from_url(url):
# import urlparse
# parsed_url = urlparse.urlparse(url)
# try:
# filename = parsed_url.netloc
# except:
# # Si falla es porque la implementación de parsed_url no reconoce los atributos como "path"
# if len(parsed_url) >= 4:
# filename = parsed_url[1]
# else:
# filename = ""
#
# return filename
def get_domain_from_url(url):
parsed_url = urlparse.urlparse(url)
try:
filename = parsed_url.netloc
except:
# Si falla es porque la implementación de parsed_url no reconoce los atributos como "path"
if len(parsed_url) >= 4:
filename = parsed_url[1]
else:
filename = ""
return filename
def get_season_and_episode(title):
@@ -365,22 +342,15 @@ def get_season_and_episode(title):
@return: Numero de temporada y episodio en formato "1x01" o cadena vacia si no se han encontrado
"""
filename = ""
# 4l3x87 - fix for series example 9-1-1
# original_title = title
# title = title.replace('9-1-1','')
patrons = ["(\d+)\s*[x-]\s*(\d+)", "(\d+)\s*×\s*(\d+)", "(?:s|t)(\d+)e(\d+)",
"(?:season|temp|stagione\w*)\s*(\d+)\s*(?:capitulo|epi|episode|episodio\w*)\s*(\d+)"]
patrons = ["(\d+)x(\d+)", "(?:s|t)(\d+)e(\d+)",
"(?:season|temp\w*)\s*(\d+)\s*(?:capitulo|epi\w*)\s*(\d+)"]
for patron in patrons:
try:
matches = re.compile(patron, re.I).search(title)
if matches:
if len(matches.group(1)) == 1:
filename = matches.group(1) + "x" + matches.group(2).zfill(2)
else:
filename = matches.group(1).lstrip('0') + "x" + matches.group(2).zfill(2)
filename = matches.group(1) + "x" + matches.group(2).zfill(2)
break
except:
pass
@@ -388,3 +358,27 @@ def get_season_and_episode(title):
logger.info("'" + title + "' -> '" + filename + "'")
return filename
def get_sha1(cadena):
try:
import hashlib
devuelve = hashlib.sha1(cadena).hexdigest()
except:
import sha
import binascii
devuelve = binascii.hexlify(sha.new(cadena).digest())
return devuelve
def get_md5(cadena):
try:
import hashlib
devuelve = hashlib.md5(cadena).hexdigest()
except:
import md5
import binascii
devuelve = binascii.hexlify(md5.new(cadena).digest())
return devuelve
-346
View File
@@ -1,346 +0,0 @@
# -*- coding: utf-8 -*-
# --------------------------------------------------------------------------------
# Scraper tools v2 for reading and processing web elements
# --------------------------------------------------------------------------------
import re
import time
import urlparse
from core.entities import html5
from platformcode import logger
def printMatches(matches):
i = 0
for match in matches:
logger.info("%d %s" % (i, match))
i = i + 1
def find_single_match(data, patron, index=0):
try:
matches = re.findall(patron, data, flags=re.DOTALL)
return matches[index]
except:
return ""
# Parse string and extracts multiple matches using regular expressions
def find_multiple_matches(text, pattern):
return re.findall(pattern, text, re.DOTALL)
def find_multiple_matches_groups(text, pattern):
r = re.compile(pattern)
return [m.groupdict() for m in r.finditer(text)]
# Convierte los codigos html "&ntilde;" y lo reemplaza por "ñ" caracter unicode utf-8
def decodeHtmlentities(data):
entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8})(;?)")
def substitute_entity(match):
ent = match.group(2) + match.group(3)
res = ""
while not ent in html5 and not ent.endswith(";") and match.group(1) != "#":
# Excepción para cuando '&' se usa como argumento en la urls contenidas en los datos
try:
res = ent[-1] + res
ent = ent[:-1]
except:
break
if match.group(1) == "#":
ent = unichr(int(ent.replace(";", "")))
return ent.encode('utf-8')
else:
cp = html5.get(ent)
if cp:
return cp.decode("unicode-escape").encode('utf-8') + res
else:
return match.group()
return entity_re.subn(substitute_entity, data)[0]
def htmlclean(cadena):
cadena = re.compile("<!--.*?-->", re.DOTALL).sub("", cadena)
cadena = cadena.replace("<center>", "")
cadena = cadena.replace("</center>", "")
cadena = cadena.replace("<cite>", "")
cadena = cadena.replace("</cite>", "")
cadena = cadena.replace("<em>", "")
cadena = cadena.replace("</em>", "")
cadena = cadena.replace("<u>", "")
cadena = cadena.replace("</u>", "")
cadena = cadena.replace("<li>", "")
cadena = cadena.replace("</li>", "")
cadena = cadena.replace("<turl>", "")
cadena = cadena.replace("</tbody>", "")
cadena = cadena.replace("<tr>", "")
cadena = cadena.replace("</tr>", "")
cadena = cadena.replace("<![CDATA[", "")
cadena = cadena.replace("<wbr>", "")
cadena = cadena.replace("<Br />", " ")
cadena = cadena.replace("<BR />", " ")
cadena = cadena.replace("<Br>", " ")
cadena = re.compile("<br[^>]*>", re.DOTALL).sub(" ", cadena)
cadena = re.compile("<script.*?</script>", re.DOTALL).sub("", cadena)
cadena = re.compile("<option[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</option>", "")
cadena = re.compile("<button[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</button>", "")
cadena = re.compile("<i[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</iframe>", "")
cadena = cadena.replace("</i>", "")
cadena = re.compile("<table[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</table>", "")
cadena = re.compile("<td[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</td>", "")
cadena = re.compile("<div[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</div>", "")
cadena = re.compile("<dd[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</dd>", "")
cadena = re.compile("<b[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</b>", "")
cadena = re.compile("<font[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</font>", "")
cadena = re.compile("<strong[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</strong>", "")
cadena = re.compile("<small[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</small>", "")
cadena = re.compile("<span[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</span>", "")
cadena = re.compile("<a[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</a>", "")
cadena = re.compile("<p[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</p>", "")
cadena = re.compile("<ul[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</ul>", "")
cadena = re.compile("<h1[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</h1>", "")
cadena = re.compile("<h2[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</h2>", "")
cadena = re.compile("<h3[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</h3>", "")
cadena = re.compile("<h4[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</h4>", "")
cadena = re.compile("<!--[^-]+-->", re.DOTALL).sub("", cadena)
cadena = re.compile("<img[^>]*>", re.DOTALL).sub("", cadena)
cadena = re.compile("<object[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</object>", "")
cadena = re.compile("<param[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</param>", "")
cadena = re.compile("<embed[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</embed>", "")
cadena = re.compile("<title[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("</title>", "")
cadena = re.compile("<link[^>]*>", re.DOTALL).sub("", cadena)
cadena = cadena.replace("\t", "")
# cadena = entityunescape(cadena)
return cadena
def slugify(title):
# print title
# Sustituye acentos y eñes
title = title.replace("Á", "a")
title = title.replace("É", "e")
title = title.replace("Í", "i")
title = title.replace("Ó", "o")
title = title.replace("Ú", "u")
title = title.replace("á", "a")
title = title.replace("é", "e")
title = title.replace("í", "i")
title = title.replace("ó", "o")
title = title.replace("ú", "u")
title = title.replace("À", "a")
title = title.replace("È", "e")
title = title.replace("Ì", "i")
title = title.replace("Ò", "o")
title = title.replace("Ù", "u")
title = title.replace("à", "a")
title = title.replace("è", "e")
title = title.replace("ì", "i")
title = title.replace("ò", "o")
title = title.replace("ù", "u")
title = title.replace("ç", "c")
title = title.replace("Ç", "C")
title = title.replace("Ñ", "n")
title = title.replace("ñ", "n")
title = title.replace("/", "-")
title = title.replace("&amp;", "&")
# Pasa a minúsculas
title = title.lower().strip()
# Elimina caracteres no válidos
validchars = "abcdefghijklmnopqrstuvwxyz1234567890- "
title = ''.join(c for c in title if c in validchars)
# Sustituye espacios en blanco duplicados y saltos de línea
title = re.compile("\s+", re.DOTALL).sub(" ", title)
# Sustituye espacios en blanco por guiones
title = re.compile("\s", re.DOTALL).sub("-", title.strip())
# Sustituye espacios en blanco duplicados y saltos de línea
title = re.compile("\-+", re.DOTALL).sub("-", title)
# Arregla casos especiales
if title.startswith("-"):
title = title[1:]
if title == "":
title = "-" + str(time.time())
return title
def remove_htmltags(string):
return re.sub('<[^<]+?>', '', string)
def remove_show_from_title(title, show):
# print slugify(title)+" == "+slugify(show)
# Quita el nombre del programa del título
if slugify(title).startswith(slugify(show)):
# Convierte a unicode primero, o el encoding se pierde
title = unicode(title, "utf-8", "replace")
show = unicode(show, "utf-8", "replace")
title = title[len(show):].strip()
if title.startswith("-"):
title = title[1:].strip()
if title == "":
title = str(time.time())
# Vuelve a utf-8
title = title.encode("utf-8", "ignore")
show = show.encode("utf-8", "ignore")
return title
# scrapertools.get_filename_from_url(media_url)[-4:]
def get_filename_from_url(url):
parsed_url = urlparse.urlparse(url)
try:
filename = parsed_url.path
except:
# Si falla es porque la implementación de parsed_url no reconoce los atributos como "path"
if len(parsed_url) >= 4:
filename = parsed_url[2]
else:
filename = ""
if "/" in filename:
filename = filename.split("/")[-1]
return filename
def get_domain_from_url(url):
parsed_url = urlparse.urlparse(url)
try:
filename = parsed_url.netloc
except:
# Si falla es porque la implementación de parsed_url no reconoce los atributos como "path"
if len(parsed_url) >= 4:
filename = parsed_url[1]
else:
filename = ""
return filename
def get_season_and_episode(title):
"""
Retorna el numero de temporada y de episodio en formato "1x01" obtenido del titulo de un episodio
Ejemplos de diferentes valores para title y su valor devuelto:
"serie 101x1.strm", "s101e1.avi", "t101e1.avi" -> '101x01'
"Name TvShow 1x6.avi" -> '1x06'
"Temp 3 episodio 2.avi" -> '3x02'
"Alcantara season 13 episodie 12.avi" -> '13x12'
"Temp1 capitulo 14" -> '1x14'
"Temporada 1: El origen Episodio 9" -> '' (entre el numero de temporada y los episodios no puede haber otro texto)
"Episodio 25: titulo episodio" -> '' (no existe el numero de temporada)
"Serie X Temporada 1" -> '' (no existe el numero del episodio)
@type title: str
@param title: titulo del episodio de una serie
@rtype: str
@return: Numero de temporada y episodio en formato "1x01" o cadena vacia si no se han encontrado
"""
filename = ""
patrons = ["(\d+)x(\d+)", "(?:s|t)(\d+)e(\d+)",
"(?:season|temp\w*)\s*(\d+)\s*(?:capitulo|epi\w*)\s*(\d+)"]
for patron in patrons:
try:
matches = re.compile(patron, re.I).search(title)
if matches:
filename = matches.group(1) + "x" + matches.group(2).zfill(2)
break
except:
pass
logger.info("'" + title + "' -> '" + filename + "'")
return filename
def get_sha1(cadena):
try:
import hashlib
devuelve = hashlib.sha1(cadena).hexdigest()
except:
import sha
import binascii
devuelve = binascii.hexlify(sha.new(cadena).digest())
return devuelve
def get_md5(cadena):
try:
import hashlib
devuelve = hashlib.md5(cadena).hexdigest()
except:
import md5
import binascii
devuelve = binascii.hexlify(md5.new(cadena).digest())
return devuelve
+2 -2
View File
@@ -506,8 +506,8 @@ def get_server_json(server_name):
def get_server_host(server_name):
from core import scrapertoolsV2
return [scrapertoolsV2.get_domain_from_url(pattern['url']) for pattern in get_server_json(server_name)['find_videos']['patterns']]
from core import scrapertools
return [scrapertools.get_domain_from_url(pattern['url']) for pattern in get_server_json(server_name)['find_videos']['patterns']]
def get_server_controls_settings(server_name):
+60 -36
View File
@@ -10,7 +10,7 @@ import urlparse
import xbmcaddon
from channelselector import thumb
from core import httptools, scrapertoolsV2, servertools, tmdb, channeltools
from core import httptools, scrapertools, servertools, tmdb, channeltools
from core.item import Item
from lib import unshortenit
from platformcode import logger, config
@@ -21,7 +21,7 @@ def hdpass_get_servers(item):
itemlist = []
data = httptools.downloadpage(item.url).data.replace('\n', '')
patron = r'<iframe(?: id="[^"]+")? width="[^"]+" height="[^"]+" src="([^"]+)"[^>]+><\/iframe>'
url = scrapertoolsV2.find_single_match(data, patron).replace("?alta", "")
url = scrapertools.find_single_match(data, patron).replace("?alta", "")
url = url.replace("&download=1", "")
if 'https' not in url:
url = 'https:' + url
@@ -37,20 +37,20 @@ def hdpass_get_servers(item):
patron_mir = '<div class="row mobileMirrs">(.*?)</div>'
patron_media = r'<input type="hidden" name="urlEmbed" data-mirror="([^"]+)" id="urlEmbed"\s*value="([^"]+)"\s*/>'
res = scrapertoolsV2.find_single_match(data, patron_res)
res = scrapertools.find_single_match(data, patron_res)
itemlist = []
for res_url, res_video in scrapertoolsV2.find_multiple_matches(res, '<option.*?value="([^"]+?)">([^<]+?)</option>'):
for res_url, res_video in scrapertools.find_multiple_matches(res, '<option.*?value="([^"]+?)">([^<]+?)</option>'):
data = httptools.downloadpage(urlparse.urljoin(url, res_url)).data.replace('\n', '')
mir = scrapertoolsV2.find_single_match(data, patron_mir)
mir = scrapertools.find_single_match(data, patron_mir)
for mir_url, srv in scrapertoolsV2.find_multiple_matches(mir, '<option.*?value="([^"]+?)">([^<]+?)</value>'):
for mir_url, srv in scrapertools.find_multiple_matches(mir, '<option.*?value="([^"]+?)">([^<]+?)</value>'):
data = httptools.downloadpage(urlparse.urljoin(url, mir_url)).data.replace('\n', '')
for media_label, media_url in scrapertoolsV2.find_multiple_matches(data, patron_media):
for media_label, media_url in scrapertools.find_multiple_matches(data, patron_media):
itemlist.append(Item(channel=item.channel,
action="play",
fulltitle=item.fulltitle,
@@ -168,13 +168,13 @@ def scrapeLang(scraped, lang, longtitle):
return language, longtitle
def cleantitle(title):
cleantitle = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(title).replace('"', "'").replace('×', 'x').replace('', '-')).strip()
cleantitle = scrapertools.htmlclean(scrapertools.decodeHtmlentities(title).replace('"', "'").replace('×', 'x').replace('', '-')).strip()
return cleantitle
def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, search, pag, function, lang):
itemlist = []
log("scrapeBlock qui", block, patron)
matches = scrapertoolsV2.find_multiple_matches_groups(block, patron)
log("scrapeBlock qui")
matches = scrapertools.find_multiple_matches_groups(block, patron)
log('MATCHES =', matches)
if debug:
@@ -214,7 +214,7 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
for kk in known_keys:
val = match[listGroups.index(kk)] if kk in listGroups else ''
if val and (kk == "url" or kk == 'thumb') and 'http' not in val:
val = scrapertoolsV2.find_single_match(item.url, 'https?://[a-z0-9.-]+') + val
val = scrapertools.find_single_match(item.url, 'https?://[a-z0-9.-]+') + val
scraped[kk] = val
if scraped['season']:
@@ -227,7 +227,7 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
episode = ''
else:
episode = re.sub(r'\s-\s|-|x|&#8211|&#215;|×', 'x', scraped['episode']) if scraped['episode'] else ''
second_episode = scrapertoolsV2.find_single_match(episode,'x\d+x(\d+)')
second_episode = scrapertools.find_single_match(episode, 'x\d+x(\d+)')
if second_episode: episode = re.sub(r'(\d+x\d+)x\d+',r'\1-', episode) + second_episode.zfill(2)
#episode = re.sub(r'\s-\s|-|x|&#8211|&#215;', 'x', scraped['episode']) if scraped['episode'] else ''
@@ -257,18 +257,18 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
if scraped["plot"]:
infolabels['plot'] = plot
if scraped['duration']:
matches = scrapertoolsV2.find_multiple_matches(scraped['duration'],
matches = scrapertools.find_multiple_matches(scraped['duration'],
r'([0-9])\s*?(?:[hH]|:|\.|,|\\|\/|\||\s)\s*?([0-9]+)')
for h, m in matches:
scraped['duration'] = int(h) * 60 + int(m)
if not matches:
scraped['duration'] = scrapertoolsV2.find_single_match(scraped['duration'], r'(\d+)')
scraped['duration'] = scrapertools.find_single_match(scraped['duration'], r'(\d+)')
infolabels['duration'] = int(scraped['duration']) * 60
if scraped['genere']:
genres = scrapertoolsV2.find_multiple_matches(scraped['genere'], '[A-Za-z]+')
genres = scrapertools.find_multiple_matches(scraped['genere'], '[A-Za-z]+')
infolabels['genere'] = ", ".join(genres)
if scraped["rating"]:
infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scraped["rating"])
infolabels['rating'] = scrapertools.decodeHtmlentities(scraped["rating"])
AC = CT = ''
if typeContentDict:
@@ -377,7 +377,18 @@ def scrape(func):
log('PATRON= ', patron)
if not data:
data = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True, session=item.session).data.replace("'", '"')
page = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True, session=item.session)
# if url may be changed and channel has findhost to update
if (not page.data or scrapertools.get_domain_from_url(page.url) != scrapertools.get_domain_from_url(item.url)) and 'findhost' in func.__globals__:
host = func.__globals__['findhost']()
parse = list(urlparse.urlparse(item.url))
from core import jsontools
jsontools.update_node(host, func.__module__.split('.')[-1], 'url')
parse[1] = scrapertools.get_domain_from_url(host)
item.url = urlparse.urlunparse(parse)
page = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True,
session=item.session)
data = page.data.replace("'", '"')
data = re.sub('\n|\t', ' ', data)
data = re.sub(r'>\s+<', '> <', data)
# replace all ' with " and eliminate newline, so we don't need to worry about
@@ -385,7 +396,7 @@ def scrape(func):
if patronBlock:
if debugBlock:
regexDbg(item, patronBlock, headers, data)
blocks = scrapertoolsV2.find_multiple_matches_groups(data, patronBlock)
blocks = scrapertools.find_multiple_matches_groups(data, patronBlock)
block = ""
for bl in blocks:
# log(len(blocks),bl)
@@ -434,7 +445,7 @@ def scrape(func):
if anime:
if function == 'episodios' or item.action == 'episodios': autorenumber.renumber(itemlist, item, 'bold')
else: autorenumber.renumber(itemlist)
if anime and autorenumber.check(item) == False and not scrapertoolsV2.find_single_match(itemlist[0].title, r'(\d+.\d+)'):
if anime and autorenumber.check(item) == False and not scrapertools.find_single_match(itemlist[0].title, r'(\d+.\d+)'):
pass
else:
if addVideolibrary and (item.infoLabels["title"] or item.fulltitle):
@@ -462,7 +473,7 @@ def dooplay_get_links(item, host):
data = httptools.downloadpage(item.url).data.replace("'", '"')
patron = r'<li id="player-option-[0-9]".*?data-type="([^"]+)" data-post="([^"]+)" data-nume="([^"]+)".*?<span class="title".*?>([^<>]+)</span>(?:<span class="server">([^<>]+))?'
matches = scrapertoolsV2.find_multiple_matches(data, patron)
matches = scrapertools.find_multiple_matches(data, patron)
ret = []
@@ -474,7 +485,7 @@ def dooplay_get_links(item, host):
"type": type
})
dataAdmin = httptools.downloadpage(host + '/wp-admin/admin-ajax.php', post=postData,headers={'Referer': item.url}).data
link = scrapertoolsV2.find_single_match(dataAdmin, "<iframe.*src='([^']+)'")
link = scrapertools.find_single_match(dataAdmin, "<iframe.*src='([^']+)'")
ret.append({
'url': link,
'title': title,
@@ -551,25 +562,25 @@ def swzz_get_url(item):
if "/link/" in item.url:
data = httptools.downloadpage(item.url, headers=headers).data
if "link =" in data:
data = scrapertoolsV2.find_single_match(data, 'link = "([^"]+)"')
data = scrapertools.find_single_match(data, 'link = "([^"]+)"')
if 'http' not in data:
data = 'https:' + data
else:
match = scrapertoolsV2.find_single_match(data, r'<meta name="og:url" content="([^"]+)"')
match = scrapertoolsV2.find_single_match(data, r'URL=([^"]+)">') if not match else match
match = scrapertools.find_single_match(data, r'<meta name="og:url" content="([^"]+)"')
match = scrapertools.find_single_match(data, r'URL=([^"]+)">') if not match else match
if not match:
from lib import jsunpack
try:
data = scrapertoolsV2.find_single_match(data.replace('\n', ''), r"(eval\s?\(function\(p,a,c,k,e,d.*?)</script>")
data = scrapertools.find_single_match(data.replace('\n', ''), r"(eval\s?\(function\(p,a,c,k,e,d.*?)</script>")
data = jsunpack.unpack(data)
logger.debug("##### play /link/ unpack ##\n%s\n##" % data)
except:
logger.debug("##### The content is yet unpacked ##\n%s\n##" % data)
data = scrapertoolsV2.find_single_match(data, r'var link(?:\s)?=(?:\s)?"([^"]+)";')
data = scrapertools.find_single_match(data, r'var link(?:\s)?=(?:\s)?"([^"]+)";')
data, c = unshortenit.unwrap_30x_only(data)
else:
data = match
@@ -626,8 +637,8 @@ def menu(func):
item = args['item']
host = func.__globals__['host']
list_servers = func.__globals__['list_servers']
list_quality = func.__globals__['list_quality']
list_servers = func.__globals__['list_servers'] if 'list_servers' in func.__globals__ else 'directo'
list_quality = func.__globals__['list_quality'] if 'list_quality' in func.__globals__ else 'default'
filename = func.__module__.split('.')[1]
global_search = False
# listUrls = ['film', 'filmSub', 'tvshow', 'tvshowSub', 'anime', 'animeSub', 'search', 'top', 'topSub']
@@ -744,7 +755,7 @@ def typo(string, typography=''):
if 'submenu' in string:
string = u"\u2022\u2022 ".encode('utf-8') + re.sub(r'\ssubmenu','',string)
if 'color' in string:
color = scrapertoolsV2.find_single_match(string,'color ([a-z]+)')
color = scrapertools.find_single_match(string, 'color ([a-z]+)')
if color == 'kod' or '': color = kod_color
string = '[COLOR '+ color +']' + re.sub(r'\scolor\s([a-z]+)','',string) + '[/COLOR]'
if 'bold' in string:
@@ -776,13 +787,13 @@ def match(item, patron='', patronBlock='', headers='', url='', post=''):
log('DATA= ', data)
if patronBlock:
block = scrapertoolsV2.find_single_match(data, patronBlock)
block = scrapertools.find_single_match(data, patronBlock)
log('BLOCK= ',block)
else:
block = data
if patron:
matches = scrapertoolsV2.find_multiple_matches(block, patron)
matches = scrapertools.find_multiple_matches(block, patron)
log('MATCHES= ',matches)
return matches, block
@@ -890,12 +901,12 @@ def nextPage(itemlist, item, data='', patron='', function_or_level=1, next_page=
# If the call is direct, leave it blank
action = inspect.stack()[function_or_level][3] if type(function_or_level) == int else function_or_level
if next_page == '':
next_page = scrapertoolsV2.find_single_match(data, patron)
next_page = scrapertools.find_single_match(data, patron)
if next_page != "":
if resub: next_page = re.sub(resub[0], resub[1], next_page)
if 'http' not in next_page:
next_page = scrapertoolsV2.find_single_match(item.url, 'https?://[a-z0-9.-]+') + next_page
next_page = scrapertools.find_single_match(item.url, 'https?://[a-z0-9.-]+') + next_page
next_page = re.sub('&amp;', '&',next_page)
log('NEXT= ', next_page)
itemlist.append(
@@ -970,6 +981,7 @@ def controls(itemlist, item, AutoPlay=True, CheckLinks=True, down_load=True):
channel_node = autoplay_node.get(item.channel, {})
settings_node = channel_node.get('settings', {})
AP = get_setting('autoplay') or settings_node['active']
APS = get_setting('autoplay_server_list')
if CL and not AP:
if get_setting('checklinks', item.channel):
@@ -982,15 +994,27 @@ def controls(itemlist, item, AutoPlay=True, CheckLinks=True, down_load=True):
checklinks_number = get_setting('checklinks_number')
itemlist = servertools.check_list_links(itemlist, checklinks_number)
if AutoPlay == True and inspect.stack()[4][3] != 'start_download':
if AutoPlay == True and not 'downloads' in inspect.stack()[3][1] + inspect.stack()[4][1]:
autoplay.start(itemlist, item)
if item.contentChannel != 'videolibrary': videolibrary(itemlist, item, function_level=3)
if get_setting('downloadenabled') and down_load == True: download(itemlist, item, function_level=3)
return itemlist
VL = False
try:
if 'downloads' in inspect.stack()[3][1] + inspect.stack()[4][1] or \
inspect.stack()[4][3] == 'play_from_library' or \
inspect.stack()[5][3] == 'play_from_library' or \
'videolibrary' in inspect.stack()[3][1] or \
'videolibrary' in inspect.stack()[4][1]:
VL = True
except:
pass
if not AP or VL or not APS:
return itemlist
def filterLang(item, itemlist):
import channeltools
# import channeltools
list_language = channeltools.get_lang(item.channel)
if len(list_language) > 1:
from specials import filtertools