KoD 0.7
- nuovo metodo di override DNS - aggiunta opzione nascondi server, se usi l'autoplay - migliorie al codice e fix vari
This commit is contained in:
@@ -1,112 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# --------------------------------------------------------------------------------
|
||||
# Cloudflare decoder
|
||||
# --------------------------------------------------------------------------------
|
||||
|
||||
import re
|
||||
import time
|
||||
import urllib
|
||||
|
||||
import urlparse
|
||||
|
||||
from platformcode import logger
|
||||
|
||||
|
||||
class Cloudflare:
|
||||
def __init__(self, response):
|
||||
self.timeout = 5
|
||||
self.domain = urlparse.urlparse(response["url"])[1]
|
||||
self.protocol = urlparse.urlparse(response["url"])[0]
|
||||
self.js_data = {}
|
||||
self.header_data = {}
|
||||
if not "var s,t,o,p,b,r,e,a,k,i,n,g,f" in response["data"] or "chk_jschl" in response["url"]:
|
||||
return
|
||||
try:
|
||||
self.js_data["data"] = response["data"]
|
||||
self.js_data["auth_url"] = \
|
||||
re.compile('<form id="challenge-form" action="([^"]+)" method="get">').findall(response["data"])[0]
|
||||
self.js_data["params"] = {}
|
||||
self.js_data["params"]["jschl_vc"] = \
|
||||
re.compile('<input type="hidden" name="jschl_vc" value="([^"]+)"/>').findall(response["data"])[0]
|
||||
self.js_data["params"]["pass"] = \
|
||||
re.compile('<input type="hidden" name="pass" value="([^"]+)"/>').findall(response["data"])[0]
|
||||
self.js_data["wait"] = int(re.compile("\}, ([\d]+)\);", re.MULTILINE).findall(response["data"])[0]) / 1000
|
||||
self.js_data["params"]["s"] = \
|
||||
re.compile('<input type="hidden" name="s" value="([^"]+)"').findall(response["data"])[0]
|
||||
except:
|
||||
logger.debug("Metodo #1 (javascript): NO disponible")
|
||||
self.js_data = {}
|
||||
if "refresh" in response["headers"]:
|
||||
try:
|
||||
self.header_data["wait"] = int(response["headers"]["refresh"].split(";")[0])
|
||||
self.header_data["auth_url"] = response["headers"]["refresh"].split("=")[1].split("?")[0]
|
||||
self.header_data["params"] = {}
|
||||
self.header_data["params"]["pass"] = response["headers"]["refresh"].split("=")[2]
|
||||
except:
|
||||
logger.debug("Metodo #2 (headers): NO disponible")
|
||||
self.header_data = {}
|
||||
|
||||
def solve_cf(self, body, domain):
|
||||
js = re.search(
|
||||
r"setTimeout\(function\(\){\s+(var s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n",
|
||||
body
|
||||
).group(1)
|
||||
|
||||
js = re.sub(r"a\.value = ((.+).toFixed\(10\))?", r"\1", js)
|
||||
js = re.sub(r'(e\s=\sfunction\(s\)\s{.*?};)', '', js, flags=re.DOTALL|re.MULTILINE)
|
||||
js = re.sub(r"\s{3,}[a-z](?: = |\.).+", "", js).replace("t.length", str(len(domain)))
|
||||
js = js.replace('; 121', '')
|
||||
js = re.sub(r"[\n\\']", "", js)
|
||||
jsEnv = """
|
||||
var t = "{domain}";
|
||||
var g = String.fromCharCode;
|
||||
o = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
|
||||
e = function(s) {{
|
||||
s += "==".slice(2 - (s.length & 3));
|
||||
var bm, r = "", r1, r2, i = 0;
|
||||
for (; i < s.length;) {{
|
||||
bm = o.indexOf(s.charAt(i++)) << 18 | o.indexOf(s.charAt(i++)) << 12 | (r1 = o.indexOf(s.charAt(i++))) << 6 | (r2 = o.indexOf(s.charAt(i++)));
|
||||
r += r1 === 64 ? g(bm >> 16 & 255) : r2 === 64 ? g(bm >> 16 & 255, bm >> 8 & 255) : g(bm >> 16 & 255, bm >> 8 & 255, bm & 255);
|
||||
}}
|
||||
return r;
|
||||
}};
|
||||
function italics (str) {{ return '<i>' + this + '</i>'; }};
|
||||
var document = {{
|
||||
getElementById: function () {{
|
||||
return {{'innerHTML': '{innerHTML}'}};
|
||||
}}
|
||||
}};
|
||||
{js}
|
||||
"""
|
||||
innerHTML = re.search('<div(?: [^<>]*)? id="([^<>]*?)">([^<>]*?)<\/div>', body , re.MULTILINE | re.DOTALL)
|
||||
innerHTML = innerHTML.group(2).replace("'", r"\'") if innerHTML else ""
|
||||
import js2py
|
||||
from jsc import jsunc
|
||||
js = jsunc(jsEnv.format(domain=domain, innerHTML=innerHTML, js=js))
|
||||
def atob(s):
|
||||
return base64.b64decode('{}'.format(s)).decode('utf-8')
|
||||
js2py.disable_pyimport()
|
||||
context = js2py.EvalJs({'atob': atob})
|
||||
result = context.eval(js)
|
||||
return float(result)
|
||||
|
||||
|
||||
@property
|
||||
def wait_time(self):
|
||||
if self.js_data.get("wait", 0):
|
||||
return self.js_data["wait"]
|
||||
else:
|
||||
return self.header_data.get("wait", 0)
|
||||
|
||||
@property
|
||||
def is_cloudflare(self):
|
||||
return self.header_data.get("wait", 0) > 0 or self.js_data.get("wait", 0) > 0
|
||||
|
||||
def get_url(self):
|
||||
# Metodo #1 (javascript)
|
||||
if self.js_data.get("wait", 0):
|
||||
self.js_data["params"]["jschl_answer"] = self.solve_cf(self.js_data["data"], self.domain)
|
||||
response = "%s://%s%s?%s" % (
|
||||
self.protocol, self.domain, self.js_data["auth_url"], urllib.urlencode(self.js_data["params"]))
|
||||
time.sleep(self.js_data["wait"])
|
||||
return response
|
||||
+113
-296
@@ -17,7 +17,7 @@ from threading import Lock
|
||||
from core.jsontools import to_utf8
|
||||
from platformcode import config, logger
|
||||
from platformcode.logger import WebErrorException
|
||||
from core import scrapertoolsV2
|
||||
from core import scrapertools
|
||||
|
||||
# Get the addon version
|
||||
__version = config.get_addon_version()
|
||||
@@ -48,7 +48,7 @@ def get_user_agent():
|
||||
|
||||
def get_url_headers(url, forced=False):
|
||||
domain = urlparse.urlparse(url)[1]
|
||||
sub_dom = scrapertoolsV2.find_single_match(domain, r'\.(.*?\.\w+)')
|
||||
sub_dom = scrapertools.find_single_match(domain, r'\.(.*?\.\w+)')
|
||||
if sub_dom and not 'google' in url:
|
||||
domain = sub_dom
|
||||
domain_cookies = cj._cookies.get("." + domain, {}).get("/", {})
|
||||
@@ -144,34 +144,6 @@ def random_useragent():
|
||||
|
||||
return default_headers["User-Agent"]
|
||||
|
||||
def channel_proxy_list(url, forced_proxy=None):
|
||||
import base64
|
||||
import ast
|
||||
|
||||
try:
|
||||
proxy_channel_bloqued_str = base64.b64decode(config.get_setting
|
||||
('proxy_channel_bloqued')).decode('utf-8')
|
||||
proxy_channel_bloqued = dict()
|
||||
proxy_channel_bloqued = ast.literal_eval(proxy_channel_bloqued_str)
|
||||
except:
|
||||
logger.debug('Proxytools not initialized correctly')
|
||||
return False
|
||||
|
||||
if not url.endswith('/'):
|
||||
url += '/'
|
||||
if scrapertoolsV2.find_single_match(url, r'(?:http.*\:)?\/\/(?:www\.)?([^\?|\/]+)(?:\?|\/)') \
|
||||
in proxy_channel_bloqued:
|
||||
if forced_proxy and forced_proxy not in ['Total', 'ProxyDirect', 'ProxyCF', 'ProxyWeb']:
|
||||
if forced_proxy in proxy_channel_bloqued[scrapertoolsV2.find_single_match(url, r'(?:http.*\:)?\/\/(?:www\.)?([^\?|\/]+)(?:\?|\/)')]:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
if forced_proxy:
|
||||
return True
|
||||
if not 'OFF' in proxy_channel_bloqued[scrapertoolsV2.find_single_match(url, r'(?:http.*\:)?\/\/(?:www\.)?([^\?|\/]+)(?:\?|\/)')]:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def show_infobox(info_dict):
|
||||
logger.info()
|
||||
@@ -232,137 +204,6 @@ def show_infobox(info_dict):
|
||||
logger.info('%s%s%s' % (box['r_dn_corner'], box['fill'] * width, box['l_dn_corner']))
|
||||
return
|
||||
|
||||
def check_proxy(url, **opt):
|
||||
proxy_data = dict()
|
||||
proxy_data['dict'] = {}
|
||||
proxy = opt.get('proxy', True)
|
||||
proxy_web = opt.get('proxy_web', False)
|
||||
proxy_addr_forced = opt.get('proxy_addr_forced', None)
|
||||
forced_proxy = opt.get('forced_proxy', None)
|
||||
|
||||
try:
|
||||
if (proxy or proxy_web) and (forced_proxy or proxy_addr_forced or
|
||||
channel_proxy_list(url, forced_proxy=forced_proxy)):
|
||||
import proxytools
|
||||
proxy_data['addr'], proxy_data['CF_addr'], proxy_data['web_name'], \
|
||||
proxy_data['log'] = proxytools.get_proxy_addr(url, post=opt.get('post', None), forced_proxy=forced_proxy)
|
||||
|
||||
if proxy_addr_forced and proxy_data['log']:
|
||||
proxy_data['log'] = scrapertoolsV2.find_single_match(str(proxy_addr_forced), r"{'http.*':\s*'(.*?)'}")
|
||||
|
||||
if proxy and proxy_data['addr']:
|
||||
if proxy_addr_forced: proxy_data['addr'] = proxy_addr_forced
|
||||
proxy_data['dict'] = proxy_data['addr']
|
||||
proxy_data['stat'] = ', Proxy Direct ' + proxy_data['log']
|
||||
elif proxy and proxy_data['CF_addr']:
|
||||
if proxy_addr_forced: proxy_data['CF_addr'] = proxy_addr_forced
|
||||
proxy_data['dict'] = proxy_data['CF_addr']
|
||||
proxy_data['stat'] = ', Proxy CF ' + proxy_data['log']
|
||||
elif proxy and proxy_addr_forced:
|
||||
proxy_data['addr'] = proxy_addr_forced
|
||||
proxy_data['dict'] = proxy_data['addr']
|
||||
proxy_data['stat'] = ', Proxy Direct ' + proxy_data['log']
|
||||
elif proxy and not proxy_data['addr'] and not proxy_data['CF_addr'] \
|
||||
and not proxy_addr_forced:
|
||||
proxy = False
|
||||
if not proxy_data['web_name']:
|
||||
proxy_data['addr'], proxy_data['CF_addr'], proxy_data['web_name'], \
|
||||
proxy_data['log'] = proxytools.get_proxy_addr(url, forced_proxy='Total')
|
||||
if proxy_data['web_name']:
|
||||
proxy_web = True
|
||||
else:
|
||||
proxy_web = False
|
||||
if proxy_data['addr']:
|
||||
proxy = True
|
||||
proxy_data['dict'] = proxy_data['addr']
|
||||
proxy_data['stat'] = ', Proxy Direct ' + proxy_data['log']
|
||||
|
||||
if proxy_web and proxy_data['web_name']:
|
||||
if opt.get('post', None): proxy_data['log'] = '(POST) ' + proxy_data['log']
|
||||
url, opt['post'], headers_proxy, proxy_data['web_name'] = \
|
||||
proxytools.set_proxy_web(url, proxy_data['web_name'], post=opt.get('post', None))
|
||||
if proxy_data['web_name']:
|
||||
proxy_data['stat'] = ', Proxy Web ' + proxy_data['log']
|
||||
if headers_proxy:
|
||||
request_headers.update(dict(headers_proxy))
|
||||
if proxy_web and not proxy_data['web_name']:
|
||||
proxy_web = False
|
||||
proxy_data['addr'], proxy_data['CF_addr'], proxy_data['web_name'], \
|
||||
proxy_data['log'] = proxytools.get_proxy_addr(url, forced_proxy='Total')
|
||||
if proxy_data['CF_addr']:
|
||||
proxy = True
|
||||
proxy_data['dict'] = proxy_data['CF_addr']
|
||||
proxy_data['stat'] = ', Proxy CF ' + proxy_data['log']
|
||||
elif proxy_data['addr']:
|
||||
proxy = True
|
||||
proxy_data['dict'] = proxy_data['addr']
|
||||
proxy_data['stat'] = ', Proxy Direct ' + proxy_data['log']
|
||||
|
||||
except:
|
||||
import traceback
|
||||
logger.error(traceback.format_exc())
|
||||
opt['proxy'] = ''
|
||||
opt['proxy_web'] = ''
|
||||
proxy_data['stat'] = ''
|
||||
proxy_data['addr'] = ''
|
||||
proxy_data['CF_addr'] = ''
|
||||
proxy_data['dict'] = {}
|
||||
proxy_data['web_name'] = ''
|
||||
proxy_data['log'] = ''
|
||||
url = opt['url_save']
|
||||
try:
|
||||
proxy_data['addr']['https'] = str('https://'+ proxy_data['addr']['https'])
|
||||
except:
|
||||
pass
|
||||
return url, proxy_data, opt
|
||||
|
||||
|
||||
def proxy_post_processing(url, proxy_data, response, opt):
|
||||
opt['out_break'] = False
|
||||
try:
|
||||
if ', Proxy Web' in proxy_data.get('stat', ''):
|
||||
import proxytools
|
||||
response["data"] = proxytools.restore_after_proxy_web(response["data"],
|
||||
proxy_data['web_name'], opt['url_save'])
|
||||
if response["data"] == 'ERROR':
|
||||
response['sucess'] = False
|
||||
if response["code"] == 302:
|
||||
proxy_data['stat'] = ', Proxy Direct'
|
||||
opt['forced_proxy'] = 'ProxyDirect'
|
||||
url = opt['url_save']
|
||||
opt['post'] = opt['post_save']
|
||||
response['sucess'] = False
|
||||
|
||||
if proxy_data.get('stat', '') and response['sucess'] == False and \
|
||||
opt.get('proxy_retries_counter', 0) <= opt.get('proxy_retries', 1) and opt.get('count_retries_tot', 5) > 1:
|
||||
import proxytools
|
||||
if ', Proxy Direct' in proxy_data.get('stat', ''):
|
||||
proxytools.get_proxy_list_method(proxy_init='ProxyDirect',
|
||||
error_skip=proxy_data['addr'], url_test=url)
|
||||
elif ', Proxy CF' in proxy_data.get('stat', ''):
|
||||
proxytools.get_proxy_list_method(proxy_init='ProxyCF',
|
||||
error_skip=proxy_data['CF_addr'])
|
||||
url = opt['url_save']
|
||||
elif ', Proxy Web' in proxy_data.get('stat', ''):
|
||||
if channel_proxy_list(opt['url_save'], forced_proxy=proxy_data['web_name']):
|
||||
opt['forced_proxy'] = 'ProxyCF'
|
||||
url =opt['url_save']
|
||||
opt['post'] = opt['post_save']
|
||||
else:
|
||||
proxytools.get_proxy_list_method(proxy_init='ProxyWeb',
|
||||
error_skip=proxy_data['web_name'])
|
||||
url =opt['url_save']
|
||||
opt['post'] = opt['post_save']
|
||||
|
||||
else:
|
||||
opt['out_break'] = True
|
||||
except:
|
||||
import traceback
|
||||
logger.error(traceback.format_exc())
|
||||
opt['out_break'] = True
|
||||
|
||||
return response["data"], response['sucess'], url, opt
|
||||
|
||||
|
||||
|
||||
def downloadpage(url, **opt):
|
||||
@@ -410,29 +251,21 @@ def downloadpage(url, **opt):
|
||||
|
||||
"""
|
||||
load_cookies()
|
||||
|
||||
# if scrapertoolsV2.get_domain_from_url(url) in ['www.seriehd.moda', 'wstream.video', 'www.guardaserie.media', 'akvideo.stream','www.piratestreaming.top']: # cloudflare urls
|
||||
# if opt.get('session', False):
|
||||
# session = opt['session'] # same session to speed up search
|
||||
# else:
|
||||
# from lib import cloudscraper
|
||||
# session = cloudscraper.create_scraper()
|
||||
# else:
|
||||
# from lib import requests
|
||||
# session = requests.session()
|
||||
|
||||
if opt.get('session', False):
|
||||
session = opt['session'] # same session to speed up search
|
||||
logger.info('same session')
|
||||
elif opt.get('use_requests', False):
|
||||
from lib import requests
|
||||
session = requests.session()
|
||||
else:
|
||||
if urlparse.urlparse(url).netloc in ['www.guardaserie.media', 'casacinema.space']:
|
||||
from lib import cloudscraper
|
||||
session = cloudscraper.create_scraper()
|
||||
elif opt.get('session', False):
|
||||
session = opt['session'] # same session to speed up search
|
||||
logger.info('same session')
|
||||
elif config.get_setting('resolver_dns') and not opt.get('use_requests', False):
|
||||
from specials import resolverdns
|
||||
session = resolverdns.session()
|
||||
else:
|
||||
from lib import requests
|
||||
session = requests.session()
|
||||
|
||||
# Headers by default, if nothing is specified
|
||||
req_headers = default_headers.copy()
|
||||
verify = opt.get('verify', True)
|
||||
|
||||
# Headers passed as parameters
|
||||
if opt.get('headers', None) is not None:
|
||||
@@ -445,148 +278,132 @@ def downloadpage(url, **opt):
|
||||
req_headers['User-Agent'] = random_useragent()
|
||||
url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]")
|
||||
|
||||
opt['proxy_retries_counter'] = 0
|
||||
opt['url_save'] = url
|
||||
opt['post_save'] = opt.get('post', None)
|
||||
|
||||
while opt['proxy_retries_counter'] <= opt.get('proxy_retries', 1):
|
||||
response = {}
|
||||
info_dict = []
|
||||
payload = dict()
|
||||
files = {}
|
||||
file_name = ''
|
||||
opt['proxy_retries_counter'] += 1
|
||||
response = {}
|
||||
info_dict = []
|
||||
payload = dict()
|
||||
files = {}
|
||||
file_name = ''
|
||||
|
||||
session.verify = opt.get('verify', True)
|
||||
session.verify = opt.get('verify', verify)
|
||||
|
||||
if opt.get('cookies', True):
|
||||
session.cookies = cj
|
||||
session.headers.update(req_headers)
|
||||
if opt.get('cookies', True):
|
||||
session.cookies = cj
|
||||
session.headers.update(req_headers)
|
||||
|
||||
# Prepare the url in case you need a proxy, or if proxies are sent from the channel
|
||||
# url, proxy_data, opt = check_proxy(url, **opt)
|
||||
# if opt.get('proxies', None) is not None:
|
||||
# session.proxies = opt['proxies']
|
||||
# elif proxy_data.get('dict', {}):
|
||||
# session.proxies = proxy_data['dict']
|
||||
proxy_data = {'dict': {}}
|
||||
proxy_data = {'dict': {}}
|
||||
|
||||
inicio = time.time()
|
||||
inicio = time.time()
|
||||
|
||||
if opt.get('timeout', None) is None and HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT is not None:
|
||||
opt['timeout'] = HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT
|
||||
if opt['timeout'] == 0: opt['timeout'] = None
|
||||
if opt.get('timeout', None) is None and HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT is not None:
|
||||
opt['timeout'] = HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT
|
||||
if opt['timeout'] == 0: opt['timeout'] = None
|
||||
|
||||
if len(url) > 0:
|
||||
try:
|
||||
if opt.get('post', None) is not None or opt.get('file', None) is not None:
|
||||
if opt.get('post', None) is not None:
|
||||
# Convert string post in dict
|
||||
try:
|
||||
json.loads(opt['post'])
|
||||
payload = opt['post']
|
||||
except:
|
||||
if not isinstance(opt['post'], dict):
|
||||
post = urlparse.parse_qs(opt['post'], keep_blank_values=1)
|
||||
payload = dict()
|
||||
if len(url) > 0:
|
||||
try:
|
||||
if opt.get('post', None) is not None or opt.get('file', None) is not None:
|
||||
if opt.get('post', None) is not None:
|
||||
# Convert string post in dict
|
||||
try:
|
||||
json.loads(opt['post'])
|
||||
payload = opt['post']
|
||||
except:
|
||||
if not isinstance(opt['post'], dict):
|
||||
post = urlparse.parse_qs(opt['post'], keep_blank_values=1)
|
||||
payload = dict()
|
||||
|
||||
for key, value in post.items():
|
||||
try:
|
||||
payload[key] = value[0]
|
||||
except:
|
||||
payload[key] = ''
|
||||
else:
|
||||
payload = opt['post']
|
||||
|
||||
# Verify 'file' and 'file_name' options to upload a buffer or file
|
||||
if opt.get('file', None) is not None:
|
||||
if os.path.isfile(opt['file']):
|
||||
if opt.get('file_name', None) is None:
|
||||
path_file, opt['file_name'] = os.path.split(opt['file'])
|
||||
files = {'file': (opt['file_name'], open(opt['file'], 'rb'))}
|
||||
file_name = opt['file']
|
||||
for key, value in post.items():
|
||||
try:
|
||||
payload[key] = value[0]
|
||||
except:
|
||||
payload[key] = ''
|
||||
else:
|
||||
files = {'file': (opt.get('file_name', 'Default'), opt['file'])}
|
||||
file_name = opt.get('file_name', 'Default') + ', Buffer de memoria'
|
||||
payload = opt['post']
|
||||
|
||||
info_dict = fill_fields_pre(url, opt, proxy_data, file_name)
|
||||
if opt.get('only_headers', False):
|
||||
# Makes the request with HEAD method
|
||||
req = session.head(url, allow_redirects=opt.get('follow_redirects', True),
|
||||
timeout=opt['timeout'])
|
||||
# Verify 'file' and 'file_name' options to upload a buffer or file
|
||||
if opt.get('file', None) is not None:
|
||||
if os.path.isfile(opt['file']):
|
||||
if opt.get('file_name', None) is None:
|
||||
path_file, opt['file_name'] = os.path.split(opt['file'])
|
||||
files = {'file': (opt['file_name'], open(opt['file'], 'rb'))}
|
||||
file_name = opt['file']
|
||||
else:
|
||||
# Makes the request with POST method
|
||||
req = session.post(url, data=payload, allow_redirects=opt.get('follow_redirects', True),
|
||||
files=files, timeout=opt['timeout'])
|
||||
files = {'file': (opt.get('file_name', 'Default'), opt['file'])}
|
||||
file_name = opt.get('file_name', 'Default') + ', Buffer de memoria'
|
||||
|
||||
elif opt.get('only_headers', False):
|
||||
info_dict = fill_fields_pre(url, opt, proxy_data, file_name)
|
||||
info_dict = fill_fields_pre(url, opt, proxy_data, file_name)
|
||||
if opt.get('only_headers', False):
|
||||
# Makes the request with HEAD method
|
||||
req = session.head(url, allow_redirects=opt.get('follow_redirects', True),
|
||||
timeout=opt['timeout'])
|
||||
else:
|
||||
info_dict = fill_fields_pre(url, opt, proxy_data, file_name)
|
||||
# Makes the request with GET method
|
||||
req = session.get(url, allow_redirects=opt.get('follow_redirects', True),
|
||||
timeout=opt['timeout'])
|
||||
# Makes the request with POST method
|
||||
req = session.post(url, data=payload, allow_redirects=opt.get('follow_redirects', True),
|
||||
files=files, timeout=opt['timeout'])
|
||||
|
||||
except Exception as e:
|
||||
from lib import requests
|
||||
if not opt.get('ignore_response_code', False) and not proxy_data.get('stat', ''):
|
||||
req = requests.Response()
|
||||
response['data'] = ''
|
||||
response['sucess'] = False
|
||||
info_dict.append(('Success', 'False'))
|
||||
response['code'] = str(e)
|
||||
info_dict.append(('Response code', str(e)))
|
||||
info_dict.append(('Finalizado en', time.time() - inicio))
|
||||
if not opt.get('alfa_s', False):
|
||||
show_infobox(info_dict)
|
||||
return type('HTTPResponse', (), response)
|
||||
else:
|
||||
req = requests.Response()
|
||||
req.status_code = str(e)
|
||||
elif opt.get('only_headers', False):
|
||||
info_dict = fill_fields_pre(url, opt, proxy_data, file_name)
|
||||
# Makes the request with HEAD method
|
||||
req = session.head(url, allow_redirects=opt.get('follow_redirects', True),
|
||||
timeout=opt['timeout'])
|
||||
else:
|
||||
info_dict = fill_fields_pre(url, opt, proxy_data, file_name)
|
||||
# Makes the request with GET method
|
||||
req = session.get(url, allow_redirects=opt.get('follow_redirects', True),
|
||||
timeout=opt['timeout'])
|
||||
except Exception as e:
|
||||
from lib import requests
|
||||
if not opt.get('ignore_response_code', False) and not proxy_data.get('stat', ''):
|
||||
response['data'] = ''
|
||||
response['sucess'] = False
|
||||
info_dict.append(('Success', 'False'))
|
||||
response['code'] = str(e)
|
||||
info_dict.append(('Response code', str(e)))
|
||||
info_dict.append(('Finalizado en', time.time() - inicio))
|
||||
if not opt.get('alfa_s', False):
|
||||
show_infobox(info_dict)
|
||||
return type('HTTPResponse', (), response)
|
||||
else:
|
||||
req = requests.Response()
|
||||
req.status_code = str(e)
|
||||
|
||||
else:
|
||||
response['data'] = ''
|
||||
response['sucess'] = False
|
||||
response['code'] = ''
|
||||
return type('HTTPResponse', (), response)
|
||||
else:
|
||||
response['data'] = ''
|
||||
response['sucess'] = False
|
||||
response['code'] = ''
|
||||
return type('HTTPResponse', (), response)
|
||||
|
||||
response_code = req.status_code
|
||||
response_code = req.status_code
|
||||
|
||||
response['data'] = req.content
|
||||
response['url'] = req.url
|
||||
if not response['data']:
|
||||
response['data'] = ''
|
||||
try:
|
||||
response['json'] = to_utf8(req.json())
|
||||
except:
|
||||
response['json'] = dict()
|
||||
response['code'] = response_code
|
||||
response['headers'] = req.headers
|
||||
response['cookies'] = req.cookies
|
||||
response['data'] = req.content
|
||||
response['url'] = req.url
|
||||
if not response['data']:
|
||||
response['data'] = ''
|
||||
try:
|
||||
response['json'] = to_utf8(req.json())
|
||||
except:
|
||||
response['json'] = dict()
|
||||
response['code'] = response_code
|
||||
response['headers'] = req.headers
|
||||
response['cookies'] = req.cookies
|
||||
|
||||
info_dict, response = fill_fields_post(info_dict, req, response, req_headers, inicio)
|
||||
info_dict, response = fill_fields_post(info_dict, req, response, req_headers, inicio)
|
||||
|
||||
if opt.get('cookies', True):
|
||||
save_cookies(alfa_s=opt.get('alfa_s', False))
|
||||
if opt.get('cookies', True):
|
||||
save_cookies(alfa_s=opt.get('alfa_s', False))
|
||||
|
||||
# is_channel = inspect.getmodule(inspect.currentframe().f_back)
|
||||
# is_channel = scrapertoolsV2.find_single_match(str(is_channel), "<module '(channels).*?'")
|
||||
# if is_channel and isinstance(response_code, int):
|
||||
# if not opt.get('ignore_response_code', False) and not proxy_data.get('stat', ''):
|
||||
# if response_code > 399:
|
||||
# show_infobox(info_dict)
|
||||
# raise WebErrorException(urlparse.urlparse(url)[1])
|
||||
# is_channel = inspect.getmodule(inspect.currentframe().f_back)
|
||||
# is_channel = scrapertools.find_single_match(str(is_channel), "<module '(channels).*?'")
|
||||
# if is_channel and isinstance(response_code, int):
|
||||
# if not opt.get('ignore_response_code', False) and not proxy_data.get('stat', ''):
|
||||
# if response_code > 399:
|
||||
# show_infobox(info_dict)
|
||||
# raise WebErrorException(urlparse.urlparse(url)[1])
|
||||
|
||||
if not 'api.themoviedb' in url and not opt.get('alfa_s', False):
|
||||
show_infobox(info_dict)
|
||||
|
||||
# If there is a proxy error, refresh the list and retry the number indicated in proxy_retries
|
||||
# response['data'], response['sucess'], url, opt = proxy_post_processing(url, proxy_data, response, opt)
|
||||
# if opt.get('out_break', False):
|
||||
# break
|
||||
if not 'api.themoviedb' in url and not opt.get('alfa_s', False):
|
||||
show_infobox(info_dict)
|
||||
|
||||
return type('HTTPResponse', (), response)
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
+76
-82
@@ -1,27 +1,17 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# --------------------------------------------------------------------------------
|
||||
# Scraper tools for reading and processing web elements
|
||||
# Scraper tools v2 for reading and processing web elements
|
||||
# --------------------------------------------------------------------------------
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
from core import httptools
|
||||
import urlparse
|
||||
|
||||
from core.entities import html5
|
||||
from platformcode import logger
|
||||
|
||||
|
||||
def get_header_from_response(url, header_to_get="", post=None, headers=None):
|
||||
header_to_get = header_to_get.lower()
|
||||
response = httptools.downloadpage(url, post=post, headers=headers, only_headers=True)
|
||||
return response.headers.get(header_to_get)
|
||||
|
||||
|
||||
def read_body_and_headers(url, post=None, headers=None, follow_redirects=False, timeout=None):
|
||||
response = httptools.downloadpage(url, post=post, headers=headers, follow_redirects=follow_redirects,
|
||||
timeout=timeout)
|
||||
return response.data, response.headers
|
||||
|
||||
|
||||
def printMatches(matches):
|
||||
i = 0
|
||||
for match in matches:
|
||||
@@ -42,8 +32,37 @@ def find_multiple_matches(text, pattern):
|
||||
return re.findall(pattern, text, re.DOTALL)
|
||||
|
||||
|
||||
def entityunescape(cadena):
|
||||
return unescape(cadena)
|
||||
def find_multiple_matches_groups(text, pattern):
|
||||
r = re.compile(pattern)
|
||||
return [m.groupdict() for m in r.finditer(text)]
|
||||
|
||||
|
||||
# Convierte los codigos html "ñ" y lo reemplaza por "ñ" caracter unicode utf-8
|
||||
def decodeHtmlentities(data):
|
||||
entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8})(;?)")
|
||||
|
||||
def substitute_entity(match):
|
||||
ent = match.group(2) + match.group(3)
|
||||
res = ""
|
||||
while not ent in html5 and not ent.endswith(";") and match.group(1) != "#":
|
||||
# Excepción para cuando '&' se usa como argumento en la urls contenidas en los datos
|
||||
try:
|
||||
res = ent[-1] + res
|
||||
ent = ent[:-1]
|
||||
except:
|
||||
break
|
||||
|
||||
if match.group(1) == "#":
|
||||
ent = unichr(int(ent.replace(";", "")))
|
||||
return ent.encode('utf-8')
|
||||
else:
|
||||
cp = html5.get(ent)
|
||||
if cp:
|
||||
return cp.decode("unicode-escape").encode('utf-8') + res
|
||||
else:
|
||||
return match.group()
|
||||
|
||||
return entity_re.subn(substitute_entity, data)[0]
|
||||
|
||||
|
||||
def unescape(text):
|
||||
@@ -84,47 +103,6 @@ def unescape(text):
|
||||
# Convierte los codigos html "ñ" y lo reemplaza por "ñ" caracter unicode utf-8
|
||||
|
||||
|
||||
def decodeHtmlentities(string):
|
||||
string = entitiesfix(string)
|
||||
entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
|
||||
|
||||
def substitute_entity(match):
|
||||
from htmlentitydefs import name2codepoint as n2cp
|
||||
ent = match.group(2)
|
||||
if match.group(1) == "#":
|
||||
return unichr(int(ent)).encode('utf-8')
|
||||
else:
|
||||
cp = n2cp.get(ent)
|
||||
|
||||
if cp:
|
||||
return unichr(cp).encode('utf-8')
|
||||
else:
|
||||
return match.group()
|
||||
|
||||
return entity_re.subn(substitute_entity, string)[0]
|
||||
|
||||
|
||||
def entitiesfix(string):
|
||||
# Las entidades comienzan siempre con el símbolo & , y terminan con un punto y coma ( ; ).
|
||||
string = string.replace("á", "á")
|
||||
string = string.replace("é", "é")
|
||||
string = string.replace("í", "í")
|
||||
string = string.replace("ó", "ó")
|
||||
string = string.replace("ú", "ú")
|
||||
string = string.replace("Á", "Á")
|
||||
string = string.replace("É", "É")
|
||||
string = string.replace("Í", "Í")
|
||||
string = string.replace("Ó", "Ó")
|
||||
string = string.replace("Ú", "Ú")
|
||||
string = string.replace("ü", "ü")
|
||||
string = string.replace("Ü", "Ü")
|
||||
string = string.replace("ñ", "ñ")
|
||||
string = string.replace("¿", "¿")
|
||||
string = string.replace("¡", "¡")
|
||||
string = string.replace(";;", ";")
|
||||
return string
|
||||
|
||||
|
||||
def htmlclean(cadena):
|
||||
cadena = re.compile("<!--.*?-->", re.DOTALL).sub("", cadena)
|
||||
|
||||
@@ -226,7 +204,7 @@ def htmlclean(cadena):
|
||||
cadena = re.compile("<link[^>]*>", re.DOTALL).sub("", cadena)
|
||||
|
||||
cadena = cadena.replace("\t", "")
|
||||
cadena = entityunescape(cadena)
|
||||
# cadena = entityunescape(cadena)
|
||||
return cadena
|
||||
|
||||
|
||||
@@ -314,8 +292,8 @@ def remove_show_from_title(title, show):
|
||||
return title
|
||||
|
||||
|
||||
# scrapertools.get_filename_from_url(media_url)[-4:]
|
||||
def get_filename_from_url(url):
|
||||
import urlparse
|
||||
parsed_url = urlparse.urlparse(url)
|
||||
try:
|
||||
filename = parsed_url.path
|
||||
@@ -332,19 +310,18 @@ def get_filename_from_url(url):
|
||||
return filename
|
||||
|
||||
|
||||
# def get_domain_from_url(url):
|
||||
# import urlparse
|
||||
# parsed_url = urlparse.urlparse(url)
|
||||
# try:
|
||||
# filename = parsed_url.netloc
|
||||
# except:
|
||||
# # Si falla es porque la implementación de parsed_url no reconoce los atributos como "path"
|
||||
# if len(parsed_url) >= 4:
|
||||
# filename = parsed_url[1]
|
||||
# else:
|
||||
# filename = ""
|
||||
#
|
||||
# return filename
|
||||
def get_domain_from_url(url):
|
||||
parsed_url = urlparse.urlparse(url)
|
||||
try:
|
||||
filename = parsed_url.netloc
|
||||
except:
|
||||
# Si falla es porque la implementación de parsed_url no reconoce los atributos como "path"
|
||||
if len(parsed_url) >= 4:
|
||||
filename = parsed_url[1]
|
||||
else:
|
||||
filename = ""
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
def get_season_and_episode(title):
|
||||
@@ -365,22 +342,15 @@ def get_season_and_episode(title):
|
||||
@return: Numero de temporada y episodio en formato "1x01" o cadena vacia si no se han encontrado
|
||||
"""
|
||||
filename = ""
|
||||
# 4l3x87 - fix for series example 9-1-1
|
||||
# original_title = title
|
||||
# title = title.replace('9-1-1','')
|
||||
|
||||
patrons = ["(\d+)\s*[x-]\s*(\d+)", "(\d+)\s*×\s*(\d+)", "(?:s|t)(\d+)e(\d+)",
|
||||
"(?:season|temp|stagione\w*)\s*(\d+)\s*(?:capitulo|epi|episode|episodio\w*)\s*(\d+)"]
|
||||
patrons = ["(\d+)x(\d+)", "(?:s|t)(\d+)e(\d+)",
|
||||
"(?:season|temp\w*)\s*(\d+)\s*(?:capitulo|epi\w*)\s*(\d+)"]
|
||||
|
||||
for patron in patrons:
|
||||
try:
|
||||
matches = re.compile(patron, re.I).search(title)
|
||||
|
||||
if matches:
|
||||
if len(matches.group(1)) == 1:
|
||||
filename = matches.group(1) + "x" + matches.group(2).zfill(2)
|
||||
else:
|
||||
filename = matches.group(1).lstrip('0') + "x" + matches.group(2).zfill(2)
|
||||
filename = matches.group(1) + "x" + matches.group(2).zfill(2)
|
||||
break
|
||||
except:
|
||||
pass
|
||||
@@ -388,3 +358,27 @@ def get_season_and_episode(title):
|
||||
logger.info("'" + title + "' -> '" + filename + "'")
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
def get_sha1(cadena):
|
||||
try:
|
||||
import hashlib
|
||||
devuelve = hashlib.sha1(cadena).hexdigest()
|
||||
except:
|
||||
import sha
|
||||
import binascii
|
||||
devuelve = binascii.hexlify(sha.new(cadena).digest())
|
||||
|
||||
return devuelve
|
||||
|
||||
|
||||
def get_md5(cadena):
|
||||
try:
|
||||
import hashlib
|
||||
devuelve = hashlib.md5(cadena).hexdigest()
|
||||
except:
|
||||
import md5
|
||||
import binascii
|
||||
devuelve = binascii.hexlify(md5.new(cadena).digest())
|
||||
|
||||
return devuelve
|
||||
|
||||
@@ -1,346 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# --------------------------------------------------------------------------------
|
||||
# Scraper tools v2 for reading and processing web elements
|
||||
# --------------------------------------------------------------------------------
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
import urlparse
|
||||
|
||||
from core.entities import html5
|
||||
from platformcode import logger
|
||||
|
||||
|
||||
def printMatches(matches):
|
||||
i = 0
|
||||
for match in matches:
|
||||
logger.info("%d %s" % (i, match))
|
||||
i = i + 1
|
||||
|
||||
|
||||
def find_single_match(data, patron, index=0):
|
||||
try:
|
||||
matches = re.findall(patron, data, flags=re.DOTALL)
|
||||
return matches[index]
|
||||
except:
|
||||
return ""
|
||||
|
||||
|
||||
# Parse string and extracts multiple matches using regular expressions
|
||||
def find_multiple_matches(text, pattern):
|
||||
return re.findall(pattern, text, re.DOTALL)
|
||||
|
||||
|
||||
def find_multiple_matches_groups(text, pattern):
|
||||
r = re.compile(pattern)
|
||||
return [m.groupdict() for m in r.finditer(text)]
|
||||
|
||||
|
||||
# Convierte los codigos html "ñ" y lo reemplaza por "ñ" caracter unicode utf-8
|
||||
def decodeHtmlentities(data):
|
||||
entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8})(;?)")
|
||||
|
||||
def substitute_entity(match):
|
||||
ent = match.group(2) + match.group(3)
|
||||
res = ""
|
||||
while not ent in html5 and not ent.endswith(";") and match.group(1) != "#":
|
||||
# Excepción para cuando '&' se usa como argumento en la urls contenidas en los datos
|
||||
try:
|
||||
res = ent[-1] + res
|
||||
ent = ent[:-1]
|
||||
except:
|
||||
break
|
||||
|
||||
if match.group(1) == "#":
|
||||
ent = unichr(int(ent.replace(";", "")))
|
||||
return ent.encode('utf-8')
|
||||
else:
|
||||
cp = html5.get(ent)
|
||||
if cp:
|
||||
return cp.decode("unicode-escape").encode('utf-8') + res
|
||||
else:
|
||||
return match.group()
|
||||
|
||||
return entity_re.subn(substitute_entity, data)[0]
|
||||
|
||||
|
||||
def htmlclean(cadena):
|
||||
cadena = re.compile("<!--.*?-->", re.DOTALL).sub("", cadena)
|
||||
|
||||
cadena = cadena.replace("<center>", "")
|
||||
cadena = cadena.replace("</center>", "")
|
||||
cadena = cadena.replace("<cite>", "")
|
||||
cadena = cadena.replace("</cite>", "")
|
||||
cadena = cadena.replace("<em>", "")
|
||||
cadena = cadena.replace("</em>", "")
|
||||
cadena = cadena.replace("<u>", "")
|
||||
cadena = cadena.replace("</u>", "")
|
||||
cadena = cadena.replace("<li>", "")
|
||||
cadena = cadena.replace("</li>", "")
|
||||
cadena = cadena.replace("<turl>", "")
|
||||
cadena = cadena.replace("</tbody>", "")
|
||||
cadena = cadena.replace("<tr>", "")
|
||||
cadena = cadena.replace("</tr>", "")
|
||||
cadena = cadena.replace("<![CDATA[", "")
|
||||
cadena = cadena.replace("<wbr>", "")
|
||||
cadena = cadena.replace("<Br />", " ")
|
||||
cadena = cadena.replace("<BR />", " ")
|
||||
cadena = cadena.replace("<Br>", " ")
|
||||
cadena = re.compile("<br[^>]*>", re.DOTALL).sub(" ", cadena)
|
||||
|
||||
cadena = re.compile("<script.*?</script>", re.DOTALL).sub("", cadena)
|
||||
|
||||
cadena = re.compile("<option[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</option>", "")
|
||||
|
||||
cadena = re.compile("<button[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</button>", "")
|
||||
|
||||
cadena = re.compile("<i[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</iframe>", "")
|
||||
cadena = cadena.replace("</i>", "")
|
||||
|
||||
cadena = re.compile("<table[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</table>", "")
|
||||
|
||||
cadena = re.compile("<td[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</td>", "")
|
||||
|
||||
cadena = re.compile("<div[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</div>", "")
|
||||
|
||||
cadena = re.compile("<dd[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</dd>", "")
|
||||
|
||||
cadena = re.compile("<b[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</b>", "")
|
||||
|
||||
cadena = re.compile("<font[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</font>", "")
|
||||
|
||||
cadena = re.compile("<strong[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</strong>", "")
|
||||
|
||||
cadena = re.compile("<small[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</small>", "")
|
||||
|
||||
cadena = re.compile("<span[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</span>", "")
|
||||
|
||||
cadena = re.compile("<a[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</a>", "")
|
||||
|
||||
cadena = re.compile("<p[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</p>", "")
|
||||
|
||||
cadena = re.compile("<ul[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</ul>", "")
|
||||
|
||||
cadena = re.compile("<h1[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</h1>", "")
|
||||
|
||||
cadena = re.compile("<h2[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</h2>", "")
|
||||
|
||||
cadena = re.compile("<h3[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</h3>", "")
|
||||
|
||||
cadena = re.compile("<h4[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</h4>", "")
|
||||
|
||||
cadena = re.compile("<!--[^-]+-->", re.DOTALL).sub("", cadena)
|
||||
|
||||
cadena = re.compile("<img[^>]*>", re.DOTALL).sub("", cadena)
|
||||
|
||||
cadena = re.compile("<object[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</object>", "")
|
||||
cadena = re.compile("<param[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</param>", "")
|
||||
cadena = re.compile("<embed[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</embed>", "")
|
||||
|
||||
cadena = re.compile("<title[^>]*>", re.DOTALL).sub("", cadena)
|
||||
cadena = cadena.replace("</title>", "")
|
||||
|
||||
cadena = re.compile("<link[^>]*>", re.DOTALL).sub("", cadena)
|
||||
|
||||
cadena = cadena.replace("\t", "")
|
||||
# cadena = entityunescape(cadena)
|
||||
return cadena
|
||||
|
||||
|
||||
def slugify(title):
|
||||
# print title
|
||||
|
||||
# Sustituye acentos y eñes
|
||||
title = title.replace("Á", "a")
|
||||
title = title.replace("É", "e")
|
||||
title = title.replace("Í", "i")
|
||||
title = title.replace("Ó", "o")
|
||||
title = title.replace("Ú", "u")
|
||||
title = title.replace("á", "a")
|
||||
title = title.replace("é", "e")
|
||||
title = title.replace("í", "i")
|
||||
title = title.replace("ó", "o")
|
||||
title = title.replace("ú", "u")
|
||||
title = title.replace("À", "a")
|
||||
title = title.replace("È", "e")
|
||||
title = title.replace("Ì", "i")
|
||||
title = title.replace("Ò", "o")
|
||||
title = title.replace("Ù", "u")
|
||||
title = title.replace("à", "a")
|
||||
title = title.replace("è", "e")
|
||||
title = title.replace("ì", "i")
|
||||
title = title.replace("ò", "o")
|
||||
title = title.replace("ù", "u")
|
||||
title = title.replace("ç", "c")
|
||||
title = title.replace("Ç", "C")
|
||||
title = title.replace("Ñ", "n")
|
||||
title = title.replace("ñ", "n")
|
||||
title = title.replace("/", "-")
|
||||
title = title.replace("&", "&")
|
||||
|
||||
# Pasa a minúsculas
|
||||
title = title.lower().strip()
|
||||
|
||||
# Elimina caracteres no válidos
|
||||
validchars = "abcdefghijklmnopqrstuvwxyz1234567890- "
|
||||
title = ''.join(c for c in title if c in validchars)
|
||||
|
||||
# Sustituye espacios en blanco duplicados y saltos de línea
|
||||
title = re.compile("\s+", re.DOTALL).sub(" ", title)
|
||||
|
||||
# Sustituye espacios en blanco por guiones
|
||||
title = re.compile("\s", re.DOTALL).sub("-", title.strip())
|
||||
|
||||
# Sustituye espacios en blanco duplicados y saltos de línea
|
||||
title = re.compile("\-+", re.DOTALL).sub("-", title)
|
||||
|
||||
# Arregla casos especiales
|
||||
if title.startswith("-"):
|
||||
title = title[1:]
|
||||
|
||||
if title == "":
|
||||
title = "-" + str(time.time())
|
||||
|
||||
return title
|
||||
|
||||
|
||||
def remove_htmltags(string):
|
||||
return re.sub('<[^<]+?>', '', string)
|
||||
|
||||
|
||||
def remove_show_from_title(title, show):
|
||||
# print slugify(title)+" == "+slugify(show)
|
||||
# Quita el nombre del programa del título
|
||||
if slugify(title).startswith(slugify(show)):
|
||||
|
||||
# Convierte a unicode primero, o el encoding se pierde
|
||||
title = unicode(title, "utf-8", "replace")
|
||||
show = unicode(show, "utf-8", "replace")
|
||||
title = title[len(show):].strip()
|
||||
|
||||
if title.startswith("-"):
|
||||
title = title[1:].strip()
|
||||
|
||||
if title == "":
|
||||
title = str(time.time())
|
||||
|
||||
# Vuelve a utf-8
|
||||
title = title.encode("utf-8", "ignore")
|
||||
show = show.encode("utf-8", "ignore")
|
||||
|
||||
return title
|
||||
|
||||
|
||||
# scrapertools.get_filename_from_url(media_url)[-4:]
|
||||
def get_filename_from_url(url):
|
||||
parsed_url = urlparse.urlparse(url)
|
||||
try:
|
||||
filename = parsed_url.path
|
||||
except:
|
||||
# Si falla es porque la implementación de parsed_url no reconoce los atributos como "path"
|
||||
if len(parsed_url) >= 4:
|
||||
filename = parsed_url[2]
|
||||
else:
|
||||
filename = ""
|
||||
|
||||
if "/" in filename:
|
||||
filename = filename.split("/")[-1]
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
def get_domain_from_url(url):
|
||||
parsed_url = urlparse.urlparse(url)
|
||||
try:
|
||||
filename = parsed_url.netloc
|
||||
except:
|
||||
# Si falla es porque la implementación de parsed_url no reconoce los atributos como "path"
|
||||
if len(parsed_url) >= 4:
|
||||
filename = parsed_url[1]
|
||||
else:
|
||||
filename = ""
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
def get_season_and_episode(title):
|
||||
"""
|
||||
Retorna el numero de temporada y de episodio en formato "1x01" obtenido del titulo de un episodio
|
||||
Ejemplos de diferentes valores para title y su valor devuelto:
|
||||
"serie 101x1.strm", "s101e1.avi", "t101e1.avi" -> '101x01'
|
||||
"Name TvShow 1x6.avi" -> '1x06'
|
||||
"Temp 3 episodio 2.avi" -> '3x02'
|
||||
"Alcantara season 13 episodie 12.avi" -> '13x12'
|
||||
"Temp1 capitulo 14" -> '1x14'
|
||||
"Temporada 1: El origen Episodio 9" -> '' (entre el numero de temporada y los episodios no puede haber otro texto)
|
||||
"Episodio 25: titulo episodio" -> '' (no existe el numero de temporada)
|
||||
"Serie X Temporada 1" -> '' (no existe el numero del episodio)
|
||||
@type title: str
|
||||
@param title: titulo del episodio de una serie
|
||||
@rtype: str
|
||||
@return: Numero de temporada y episodio en formato "1x01" o cadena vacia si no se han encontrado
|
||||
"""
|
||||
filename = ""
|
||||
|
||||
patrons = ["(\d+)x(\d+)", "(?:s|t)(\d+)e(\d+)",
|
||||
"(?:season|temp\w*)\s*(\d+)\s*(?:capitulo|epi\w*)\s*(\d+)"]
|
||||
|
||||
for patron in patrons:
|
||||
try:
|
||||
matches = re.compile(patron, re.I).search(title)
|
||||
if matches:
|
||||
filename = matches.group(1) + "x" + matches.group(2).zfill(2)
|
||||
break
|
||||
except:
|
||||
pass
|
||||
|
||||
logger.info("'" + title + "' -> '" + filename + "'")
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
def get_sha1(cadena):
|
||||
try:
|
||||
import hashlib
|
||||
devuelve = hashlib.sha1(cadena).hexdigest()
|
||||
except:
|
||||
import sha
|
||||
import binascii
|
||||
devuelve = binascii.hexlify(sha.new(cadena).digest())
|
||||
|
||||
return devuelve
|
||||
|
||||
|
||||
def get_md5(cadena):
|
||||
try:
|
||||
import hashlib
|
||||
devuelve = hashlib.md5(cadena).hexdigest()
|
||||
except:
|
||||
import md5
|
||||
import binascii
|
||||
devuelve = binascii.hexlify(md5.new(cadena).digest())
|
||||
|
||||
return devuelve
|
||||
+2
-2
@@ -506,8 +506,8 @@ def get_server_json(server_name):
|
||||
|
||||
|
||||
def get_server_host(server_name):
|
||||
from core import scrapertoolsV2
|
||||
return [scrapertoolsV2.get_domain_from_url(pattern['url']) for pattern in get_server_json(server_name)['find_videos']['patterns']]
|
||||
from core import scrapertools
|
||||
return [scrapertools.get_domain_from_url(pattern['url']) for pattern in get_server_json(server_name)['find_videos']['patterns']]
|
||||
|
||||
|
||||
def get_server_controls_settings(server_name):
|
||||
|
||||
+60
-36
@@ -10,7 +10,7 @@ import urlparse
|
||||
import xbmcaddon
|
||||
|
||||
from channelselector import thumb
|
||||
from core import httptools, scrapertoolsV2, servertools, tmdb, channeltools
|
||||
from core import httptools, scrapertools, servertools, tmdb, channeltools
|
||||
from core.item import Item
|
||||
from lib import unshortenit
|
||||
from platformcode import logger, config
|
||||
@@ -21,7 +21,7 @@ def hdpass_get_servers(item):
|
||||
itemlist = []
|
||||
data = httptools.downloadpage(item.url).data.replace('\n', '')
|
||||
patron = r'<iframe(?: id="[^"]+")? width="[^"]+" height="[^"]+" src="([^"]+)"[^>]+><\/iframe>'
|
||||
url = scrapertoolsV2.find_single_match(data, patron).replace("?alta", "")
|
||||
url = scrapertools.find_single_match(data, patron).replace("?alta", "")
|
||||
url = url.replace("&download=1", "")
|
||||
if 'https' not in url:
|
||||
url = 'https:' + url
|
||||
@@ -37,20 +37,20 @@ def hdpass_get_servers(item):
|
||||
patron_mir = '<div class="row mobileMirrs">(.*?)</div>'
|
||||
patron_media = r'<input type="hidden" name="urlEmbed" data-mirror="([^"]+)" id="urlEmbed"\s*value="([^"]+)"\s*/>'
|
||||
|
||||
res = scrapertoolsV2.find_single_match(data, patron_res)
|
||||
res = scrapertools.find_single_match(data, patron_res)
|
||||
|
||||
itemlist = []
|
||||
|
||||
for res_url, res_video in scrapertoolsV2.find_multiple_matches(res, '<option.*?value="([^"]+?)">([^<]+?)</option>'):
|
||||
for res_url, res_video in scrapertools.find_multiple_matches(res, '<option.*?value="([^"]+?)">([^<]+?)</option>'):
|
||||
|
||||
data = httptools.downloadpage(urlparse.urljoin(url, res_url)).data.replace('\n', '')
|
||||
|
||||
mir = scrapertoolsV2.find_single_match(data, patron_mir)
|
||||
mir = scrapertools.find_single_match(data, patron_mir)
|
||||
|
||||
for mir_url, srv in scrapertoolsV2.find_multiple_matches(mir, '<option.*?value="([^"]+?)">([^<]+?)</value>'):
|
||||
for mir_url, srv in scrapertools.find_multiple_matches(mir, '<option.*?value="([^"]+?)">([^<]+?)</value>'):
|
||||
|
||||
data = httptools.downloadpage(urlparse.urljoin(url, mir_url)).data.replace('\n', '')
|
||||
for media_label, media_url in scrapertoolsV2.find_multiple_matches(data, patron_media):
|
||||
for media_label, media_url in scrapertools.find_multiple_matches(data, patron_media):
|
||||
itemlist.append(Item(channel=item.channel,
|
||||
action="play",
|
||||
fulltitle=item.fulltitle,
|
||||
@@ -168,13 +168,13 @@ def scrapeLang(scraped, lang, longtitle):
|
||||
return language, longtitle
|
||||
|
||||
def cleantitle(title):
|
||||
cleantitle = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(title).replace('"', "'").replace('×', 'x').replace('–', '-')).strip()
|
||||
cleantitle = scrapertools.htmlclean(scrapertools.decodeHtmlentities(title).replace('"', "'").replace('×', 'x').replace('–', '-')).strip()
|
||||
return cleantitle
|
||||
|
||||
def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, search, pag, function, lang):
|
||||
itemlist = []
|
||||
log("scrapeBlock qui", block, patron)
|
||||
matches = scrapertoolsV2.find_multiple_matches_groups(block, patron)
|
||||
log("scrapeBlock qui")
|
||||
matches = scrapertools.find_multiple_matches_groups(block, patron)
|
||||
log('MATCHES =', matches)
|
||||
|
||||
if debug:
|
||||
@@ -214,7 +214,7 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
|
||||
for kk in known_keys:
|
||||
val = match[listGroups.index(kk)] if kk in listGroups else ''
|
||||
if val and (kk == "url" or kk == 'thumb') and 'http' not in val:
|
||||
val = scrapertoolsV2.find_single_match(item.url, 'https?://[a-z0-9.-]+') + val
|
||||
val = scrapertools.find_single_match(item.url, 'https?://[a-z0-9.-]+') + val
|
||||
scraped[kk] = val
|
||||
|
||||
if scraped['season']:
|
||||
@@ -227,7 +227,7 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
|
||||
episode = ''
|
||||
else:
|
||||
episode = re.sub(r'\s-\s|-|x|–|×|×', 'x', scraped['episode']) if scraped['episode'] else ''
|
||||
second_episode = scrapertoolsV2.find_single_match(episode,'x\d+x(\d+)')
|
||||
second_episode = scrapertools.find_single_match(episode, 'x\d+x(\d+)')
|
||||
if second_episode: episode = re.sub(r'(\d+x\d+)x\d+',r'\1-', episode) + second_episode.zfill(2)
|
||||
|
||||
#episode = re.sub(r'\s-\s|-|x|–|×', 'x', scraped['episode']) if scraped['episode'] else ''
|
||||
@@ -257,18 +257,18 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
|
||||
if scraped["plot"]:
|
||||
infolabels['plot'] = plot
|
||||
if scraped['duration']:
|
||||
matches = scrapertoolsV2.find_multiple_matches(scraped['duration'],
|
||||
matches = scrapertools.find_multiple_matches(scraped['duration'],
|
||||
r'([0-9])\s*?(?:[hH]|:|\.|,|\\|\/|\||\s)\s*?([0-9]+)')
|
||||
for h, m in matches:
|
||||
scraped['duration'] = int(h) * 60 + int(m)
|
||||
if not matches:
|
||||
scraped['duration'] = scrapertoolsV2.find_single_match(scraped['duration'], r'(\d+)')
|
||||
scraped['duration'] = scrapertools.find_single_match(scraped['duration'], r'(\d+)')
|
||||
infolabels['duration'] = int(scraped['duration']) * 60
|
||||
if scraped['genere']:
|
||||
genres = scrapertoolsV2.find_multiple_matches(scraped['genere'], '[A-Za-z]+')
|
||||
genres = scrapertools.find_multiple_matches(scraped['genere'], '[A-Za-z]+')
|
||||
infolabels['genere'] = ", ".join(genres)
|
||||
if scraped["rating"]:
|
||||
infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scraped["rating"])
|
||||
infolabels['rating'] = scrapertools.decodeHtmlentities(scraped["rating"])
|
||||
|
||||
AC = CT = ''
|
||||
if typeContentDict:
|
||||
@@ -377,7 +377,18 @@ def scrape(func):
|
||||
|
||||
log('PATRON= ', patron)
|
||||
if not data:
|
||||
data = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True, session=item.session).data.replace("'", '"')
|
||||
page = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True, session=item.session)
|
||||
# if url may be changed and channel has findhost to update
|
||||
if (not page.data or scrapertools.get_domain_from_url(page.url) != scrapertools.get_domain_from_url(item.url)) and 'findhost' in func.__globals__:
|
||||
host = func.__globals__['findhost']()
|
||||
parse = list(urlparse.urlparse(item.url))
|
||||
from core import jsontools
|
||||
jsontools.update_node(host, func.__module__.split('.')[-1], 'url')
|
||||
parse[1] = scrapertools.get_domain_from_url(host)
|
||||
item.url = urlparse.urlunparse(parse)
|
||||
page = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True,
|
||||
session=item.session)
|
||||
data = page.data.replace("'", '"')
|
||||
data = re.sub('\n|\t', ' ', data)
|
||||
data = re.sub(r'>\s+<', '> <', data)
|
||||
# replace all ' with " and eliminate newline, so we don't need to worry about
|
||||
@@ -385,7 +396,7 @@ def scrape(func):
|
||||
if patronBlock:
|
||||
if debugBlock:
|
||||
regexDbg(item, patronBlock, headers, data)
|
||||
blocks = scrapertoolsV2.find_multiple_matches_groups(data, patronBlock)
|
||||
blocks = scrapertools.find_multiple_matches_groups(data, patronBlock)
|
||||
block = ""
|
||||
for bl in blocks:
|
||||
# log(len(blocks),bl)
|
||||
@@ -434,7 +445,7 @@ def scrape(func):
|
||||
if anime:
|
||||
if function == 'episodios' or item.action == 'episodios': autorenumber.renumber(itemlist, item, 'bold')
|
||||
else: autorenumber.renumber(itemlist)
|
||||
if anime and autorenumber.check(item) == False and not scrapertoolsV2.find_single_match(itemlist[0].title, r'(\d+.\d+)'):
|
||||
if anime and autorenumber.check(item) == False and not scrapertools.find_single_match(itemlist[0].title, r'(\d+.\d+)'):
|
||||
pass
|
||||
else:
|
||||
if addVideolibrary and (item.infoLabels["title"] or item.fulltitle):
|
||||
@@ -462,7 +473,7 @@ def dooplay_get_links(item, host):
|
||||
|
||||
data = httptools.downloadpage(item.url).data.replace("'", '"')
|
||||
patron = r'<li id="player-option-[0-9]".*?data-type="([^"]+)" data-post="([^"]+)" data-nume="([^"]+)".*?<span class="title".*?>([^<>]+)</span>(?:<span class="server">([^<>]+))?'
|
||||
matches = scrapertoolsV2.find_multiple_matches(data, patron)
|
||||
matches = scrapertools.find_multiple_matches(data, patron)
|
||||
|
||||
ret = []
|
||||
|
||||
@@ -474,7 +485,7 @@ def dooplay_get_links(item, host):
|
||||
"type": type
|
||||
})
|
||||
dataAdmin = httptools.downloadpage(host + '/wp-admin/admin-ajax.php', post=postData,headers={'Referer': item.url}).data
|
||||
link = scrapertoolsV2.find_single_match(dataAdmin, "<iframe.*src='([^']+)'")
|
||||
link = scrapertools.find_single_match(dataAdmin, "<iframe.*src='([^']+)'")
|
||||
ret.append({
|
||||
'url': link,
|
||||
'title': title,
|
||||
@@ -551,25 +562,25 @@ def swzz_get_url(item):
|
||||
if "/link/" in item.url:
|
||||
data = httptools.downloadpage(item.url, headers=headers).data
|
||||
if "link =" in data:
|
||||
data = scrapertoolsV2.find_single_match(data, 'link = "([^"]+)"')
|
||||
data = scrapertools.find_single_match(data, 'link = "([^"]+)"')
|
||||
if 'http' not in data:
|
||||
data = 'https:' + data
|
||||
else:
|
||||
match = scrapertoolsV2.find_single_match(data, r'<meta name="og:url" content="([^"]+)"')
|
||||
match = scrapertoolsV2.find_single_match(data, r'URL=([^"]+)">') if not match else match
|
||||
match = scrapertools.find_single_match(data, r'<meta name="og:url" content="([^"]+)"')
|
||||
match = scrapertools.find_single_match(data, r'URL=([^"]+)">') if not match else match
|
||||
|
||||
if not match:
|
||||
from lib import jsunpack
|
||||
|
||||
try:
|
||||
data = scrapertoolsV2.find_single_match(data.replace('\n', ''), r"(eval\s?\(function\(p,a,c,k,e,d.*?)</script>")
|
||||
data = scrapertools.find_single_match(data.replace('\n', ''), r"(eval\s?\(function\(p,a,c,k,e,d.*?)</script>")
|
||||
data = jsunpack.unpack(data)
|
||||
|
||||
logger.debug("##### play /link/ unpack ##\n%s\n##" % data)
|
||||
except:
|
||||
logger.debug("##### The content is yet unpacked ##\n%s\n##" % data)
|
||||
|
||||
data = scrapertoolsV2.find_single_match(data, r'var link(?:\s)?=(?:\s)?"([^"]+)";')
|
||||
data = scrapertools.find_single_match(data, r'var link(?:\s)?=(?:\s)?"([^"]+)";')
|
||||
data, c = unshortenit.unwrap_30x_only(data)
|
||||
else:
|
||||
data = match
|
||||
@@ -626,8 +637,8 @@ def menu(func):
|
||||
|
||||
item = args['item']
|
||||
host = func.__globals__['host']
|
||||
list_servers = func.__globals__['list_servers']
|
||||
list_quality = func.__globals__['list_quality']
|
||||
list_servers = func.__globals__['list_servers'] if 'list_servers' in func.__globals__ else 'directo'
|
||||
list_quality = func.__globals__['list_quality'] if 'list_quality' in func.__globals__ else 'default'
|
||||
filename = func.__module__.split('.')[1]
|
||||
global_search = False
|
||||
# listUrls = ['film', 'filmSub', 'tvshow', 'tvshowSub', 'anime', 'animeSub', 'search', 'top', 'topSub']
|
||||
@@ -744,7 +755,7 @@ def typo(string, typography=''):
|
||||
if 'submenu' in string:
|
||||
string = u"\u2022\u2022 ".encode('utf-8') + re.sub(r'\ssubmenu','',string)
|
||||
if 'color' in string:
|
||||
color = scrapertoolsV2.find_single_match(string,'color ([a-z]+)')
|
||||
color = scrapertools.find_single_match(string, 'color ([a-z]+)')
|
||||
if color == 'kod' or '': color = kod_color
|
||||
string = '[COLOR '+ color +']' + re.sub(r'\scolor\s([a-z]+)','',string) + '[/COLOR]'
|
||||
if 'bold' in string:
|
||||
@@ -776,13 +787,13 @@ def match(item, patron='', patronBlock='', headers='', url='', post=''):
|
||||
log('DATA= ', data)
|
||||
|
||||
if patronBlock:
|
||||
block = scrapertoolsV2.find_single_match(data, patronBlock)
|
||||
block = scrapertools.find_single_match(data, patronBlock)
|
||||
log('BLOCK= ',block)
|
||||
else:
|
||||
block = data
|
||||
|
||||
if patron:
|
||||
matches = scrapertoolsV2.find_multiple_matches(block, patron)
|
||||
matches = scrapertools.find_multiple_matches(block, patron)
|
||||
log('MATCHES= ',matches)
|
||||
|
||||
return matches, block
|
||||
@@ -890,12 +901,12 @@ def nextPage(itemlist, item, data='', patron='', function_or_level=1, next_page=
|
||||
# If the call is direct, leave it blank
|
||||
action = inspect.stack()[function_or_level][3] if type(function_or_level) == int else function_or_level
|
||||
if next_page == '':
|
||||
next_page = scrapertoolsV2.find_single_match(data, patron)
|
||||
next_page = scrapertools.find_single_match(data, patron)
|
||||
|
||||
if next_page != "":
|
||||
if resub: next_page = re.sub(resub[0], resub[1], next_page)
|
||||
if 'http' not in next_page:
|
||||
next_page = scrapertoolsV2.find_single_match(item.url, 'https?://[a-z0-9.-]+') + next_page
|
||||
next_page = scrapertools.find_single_match(item.url, 'https?://[a-z0-9.-]+') + next_page
|
||||
next_page = re.sub('&', '&',next_page)
|
||||
log('NEXT= ', next_page)
|
||||
itemlist.append(
|
||||
@@ -970,6 +981,7 @@ def controls(itemlist, item, AutoPlay=True, CheckLinks=True, down_load=True):
|
||||
channel_node = autoplay_node.get(item.channel, {})
|
||||
settings_node = channel_node.get('settings', {})
|
||||
AP = get_setting('autoplay') or settings_node['active']
|
||||
APS = get_setting('autoplay_server_list')
|
||||
|
||||
if CL and not AP:
|
||||
if get_setting('checklinks', item.channel):
|
||||
@@ -982,15 +994,27 @@ def controls(itemlist, item, AutoPlay=True, CheckLinks=True, down_load=True):
|
||||
checklinks_number = get_setting('checklinks_number')
|
||||
itemlist = servertools.check_list_links(itemlist, checklinks_number)
|
||||
|
||||
if AutoPlay == True and inspect.stack()[4][3] != 'start_download':
|
||||
if AutoPlay == True and not 'downloads' in inspect.stack()[3][1] + inspect.stack()[4][1]:
|
||||
autoplay.start(itemlist, item)
|
||||
|
||||
if item.contentChannel != 'videolibrary': videolibrary(itemlist, item, function_level=3)
|
||||
if get_setting('downloadenabled') and down_load == True: download(itemlist, item, function_level=3)
|
||||
return itemlist
|
||||
|
||||
VL = False
|
||||
try:
|
||||
if 'downloads' in inspect.stack()[3][1] + inspect.stack()[4][1] or \
|
||||
inspect.stack()[4][3] == 'play_from_library' or \
|
||||
inspect.stack()[5][3] == 'play_from_library' or \
|
||||
'videolibrary' in inspect.stack()[3][1] or \
|
||||
'videolibrary' in inspect.stack()[4][1]:
|
||||
VL = True
|
||||
except:
|
||||
pass
|
||||
if not AP or VL or not APS:
|
||||
return itemlist
|
||||
|
||||
def filterLang(item, itemlist):
|
||||
import channeltools
|
||||
# import channeltools
|
||||
list_language = channeltools.get_lang(item.channel)
|
||||
if len(list_language) > 1:
|
||||
from specials import filtertools
|
||||
|
||||
Reference in New Issue
Block a user