fix youtube (trailer)

This commit is contained in:
alfa-addon
2020-02-26 21:04:38 +01:00
committed by marco
parent e0820ab0f7
commit 685c2747bd
2 changed files with 371 additions and 101 deletions
+249
View File
@@ -0,0 +1,249 @@
# -*- coding: utf-8 -*-
import json
import operator
import re
_OPERATORS = [
('|', operator.or_),
('^', operator.xor),
('&', operator.and_),
('>>', operator.rshift),
('<<', operator.lshift),
('-', operator.sub),
('+', operator.add),
('%', operator.mod),
('/', operator.truediv),
('*', operator.mul),
]
_ASSIGN_OPERATORS = []
for op, opfunc in _OPERATORS:
_ASSIGN_OPERATORS.append([op + '=', opfunc])
_ASSIGN_OPERATORS.append(('=', lambda cur, right: right))
_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
class JSInterpreter(object):
def __init__(self, code, objects=None):
if objects is None:
objects = {}
self.code = code
self._functions = {}
self._objects = objects
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
should_abort = False
stmt = stmt.lstrip()
stmt_m = re.match(r'var\s', stmt)
if stmt_m:
expr = stmt[len(stmt_m.group(0)):]
else:
return_m = re.match(r'return(?:\s+|$)', stmt)
if return_m:
expr = stmt[len(return_m.group(0)):]
should_abort = True
else:
# Try interpreting it as an expression
expr = stmt
v = self.interpret_expression(expr, local_vars, allow_recursion)
return v, should_abort
def interpret_expression(self, expr, local_vars, allow_recursion):
expr = expr.strip()
if expr == '': # Empty expression
return None
if expr.startswith('('):
parens_count = 0
for m in re.finditer(r'[()]', expr):
if m.group(0) == '(':
parens_count += 1
else:
parens_count -= 1
if parens_count == 0:
sub_expr = expr[1:m.start()]
sub_result = self.interpret_expression(
sub_expr, local_vars, allow_recursion)
remaining_expr = expr[m.end():].strip()
if not remaining_expr:
return sub_result
else:
expr = json.dumps(sub_result) + remaining_expr
break
for op, opfunc in _ASSIGN_OPERATORS:
m = re.match(r'''(?x)
(?P<out>%s)(?:\[(?P<index>[^\]]+?)\])?
\s*%s
(?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr)
if not m:
continue
right_val = self.interpret_expression(
m.group('expr'), local_vars, allow_recursion - 1)
if m.groupdict().get('index'):
lvar = local_vars[m.group('out')]
idx = self.interpret_expression(
m.group('index'), local_vars, allow_recursion)
assert isinstance(idx, int)
cur = lvar[idx]
val = opfunc(cur, right_val)
lvar[idx] = val
return val
else:
cur = local_vars.get(m.group('out'))
val = opfunc(cur, right_val)
local_vars[m.group('out')] = val
return val
if expr.isdigit():
return int(expr)
var_m = re.match(
r'(?!if|return|true|false)(?P<name>%s)$' % _NAME_RE,
expr)
if var_m:
return local_vars[var_m.group('name')]
try:
return json.loads(expr)
except ValueError:
pass
m = re.match(
r'(?P<var>%s)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$' % _NAME_RE,
expr)
if m:
variable = m.group('var')
member = m.group('member')
arg_str = m.group('args')
if variable in local_vars:
obj = local_vars[variable]
else:
if variable not in self._objects:
self._objects[variable] = self.extract_object(variable)
obj = self._objects[variable]
if arg_str is None:
# Member access
if member == 'length':
return len(obj)
return obj[member]
assert expr.endswith(')')
# Function call
if arg_str == '':
argvals = tuple()
else:
argvals = []
for v in arg_str.split(','):
argvals.extend([self.interpret_expression(v, local_vars, allow_recursion)])
if member == 'split':
assert argvals == ('',)
return list(obj)
if member == 'join':
assert len(argvals) == 1
return argvals[0].join(obj)
if member == 'reverse':
assert len(argvals) == 0
obj.reverse()
return obj
if member == 'slice':
assert len(argvals) == 1
return obj[argvals[0]:]
if member == 'splice':
assert isinstance(obj, list)
index, howMany = argvals
res = []
for i in range(index, min(index + howMany, len(obj))):
res.append(obj.pop(index))
return res
return obj[member](argvals)
m = re.match(
r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
if m:
val = local_vars[m.group('in')]
idx = self.interpret_expression(
m.group('idx'), local_vars, allow_recursion - 1)
return val[idx]
for op, opfunc in _OPERATORS:
m = re.match(r'(?P<x>.+?)%s(?P<y>.+)' % re.escape(op), expr)
if not m:
continue
x, abort = self.interpret_statement(
m.group('x'), local_vars, allow_recursion - 1)
y, abort = self.interpret_statement(
m.group('y'), local_vars, allow_recursion - 1)
return opfunc(x, y)
m = re.match(
r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]+)\)$' % _NAME_RE, expr)
if m:
fname = m.group('func')
argvals = []
for v in m.group('args').split(','):
if v.isdigit():
argvals.append([int(v)])
else:
argvals.append([local_vars[v]])
if fname not in self._functions:
self._functions[fname] = self.extract_function(fname)
return self._functions[fname](argvals)
def extract_object(self, objname):
obj = {}
obj_m = re.search(
(r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) +
r'\s*(?P<fields>([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\}(?:,\s*)?)*)' +
r'\}\s*;',
self.code)
fields = obj_m.group('fields')
# Currently, it only supports function definitions
fields_m = re.finditer(
r'(?P<key>[a-zA-Z$0-9]+)\s*:\s*function'
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
fields)
for f in fields_m:
argnames = f.group('args').split(',')
obj[f.group('key')] = self.build_function(argnames, f.group('code'))
return obj
def extract_function(self, funcname):
func_m = re.search(
r'''(?x)
(?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
\((?P<args>[^)]*)\)\s*
\{(?P<code>[^}]+)\}''' % (
re.escape(funcname), re.escape(funcname), re.escape(funcname)),
self.code)
argnames = func_m.group('args').split(',')
return self.build_function(argnames, func_m.group('code'))
def call_function(self, funcname, *args):
f = self.extract_function(funcname)
return f(args)
def build_function(self, argnames, code):
def resf(args):
local_vars = dict(zip(argnames, args))
for stmt in code.split(';'):
res, abort = self.interpret_statement(stmt, local_vars)
if abort:
break
return res
return resf
+122 -101
View File
@@ -1,12 +1,19 @@
# s-*- coding: utf-8 -*- # s-*- coding: utf-8 -*-
import re import sys
import urllib PY3 = False
if sys.version_info[0] >= 3: PY3 = True; unicode = str; unichr = chr; long = int
try: if PY3:
import urlparse #from future import standard_library
except: #standard_library.install_aliases()
import urllib.parse as urllib # Es muy lento en PY2. En PY3 es nativo
import urllib.parse as urlparse import urllib.parse as urlparse
else:
import urllib # Usamos el nativo de PY2 que es más rápido
import urlparse
import re
from core import httptools from core import httptools
from core import jsontools as json from core import jsontools as json
@@ -14,6 +21,72 @@ from core import scrapertools
from platformcode import config, logger from platformcode import config, logger
itag_list = {1: "video",
5: "flv 240p",
6: "flv 270p",
17: "3gp 144p",
18: "mp4 360p",
22: "mp4 720p",
34: "flv 360p",
35: "flv 480p",
36: "3gp 180p",
37: "mp4 1080p",
38: "mp4 3072p",
43: "webm 360p",
44: "webm 480p",
45: "webm 720p",
46: "webm 1080p",
82: "mp4 360p 3D",
83: "mp4 480p 3D",
84: "mp4 720p 3D",
85: "mp4 1080p 3D",
92: "hls 240p 3D",
93: "hls 360p 3D",
94: "hls 480p 3D",
95: "hls 720p 3D",
96: "hls 1080p",
100: "webm 360p 3D",
101: "webm 480p 3D",
102: "webm 720p 3D",
132: "hls 240p",
133: "mp4 240p",
134: "mp4 360p",
135: "mp4 480p",
136: "mp4 720p",
137: "mp4 1080p",
138: "mp4 2160p",
160: "mp4 144p",
167: "webm 360p",
168: "webm 480p",
169: "webm 1080p",
219: "webm 144p",
242: "webm 240p",
243: "webm 360p",
244: "webm 480p",
245: "webm 480p",
246: "webm 480p",
247: "webm 720p",
248: "webm 1080p",
266: "mp4 2160p",
271: "webm 1440p",
272: "webm 4320p",
278: "webm 144p",
298: "mp4 720p",
299: "mp4 1080p",
302: "webm 720p",
303: "webm 1080p",
308: "webm 1440p",
313: "webm 2160p",
315: "webm 2160p",
330: "webm 144p hdr",
331: "webm 240p hdr",
332: "webm 360p hdr",
333: "webm 480p hdr",
334: "webm 720p hdr",
335: "webm 1080p hdr",
336: "webm 1440p hdr"}
def test_video_exists(page_url): def test_video_exists(page_url):
logger.info("(page_url='%s')" % page_url) logger.info("(page_url='%s')" % page_url)
@@ -21,11 +94,9 @@ def test_video_exists(page_url):
if "File was deleted" in data: if "File was deleted" in data:
return False, config.get_localized_string(70449) % "Youtube" return False, config.get_localized_string(70449) % "Youtube"
return True, "" return True, ""
def get_video_url(page_url, premium=False, user="", password="", video_password=""): def get_video_url(page_url, premium=False, user="", password="", video_password=""):
logger.info("(page_url='%s')" % page_url) logger.info("(page_url='%s')" % page_url)
@@ -35,10 +106,6 @@ def get_video_url(page_url, premium=False, user="", password="", video_password=
video_id = scrapertools.find_single_match(page_url, '(?:v=|embed/)([A-z0-9_-]{11})') video_id = scrapertools.find_single_match(page_url, '(?:v=|embed/)([A-z0-9_-]{11})')
video_urls = extract_videos(video_id) video_urls = extract_videos(video_id)
video_urls.reverse()
for video_url in video_urls:
logger.info(str(video_url))
return video_urls return video_urls
@@ -86,48 +153,37 @@ def extract_flashvars(data):
return flashvars return flashvars
def get_signature(youtube_page_data):
from lib.jsinterpreter import JSInterpreter
urljs = scrapertools.find_single_match(youtube_page_data, '"assets":.*?"js":\s*"([^"]+)"')
urljs = urljs.replace("\\", "")
if urljs:
if not re.search(r'https?://', urljs):
urljs = urlparse.urljoin("https://www.youtube.com", urljs)
data_js = httptools.downloadpage(urljs).data
pattern = r'(?P<fname>\w+)=function\(\w+\){(\w)=\2\.split\(""\);.*?return\s+\2\.join\(""\)}'
funcname = re.search(pattern, data_js).group('fname')
jsi = JSInterpreter(data_js)
js_signature = jsi.extract_function(funcname)
return js_signature
def extract_videos(video_id): def extract_videos(video_id):
fmt_value = {
5: "240p h263 flv",
6: "270p h263 flv",
18: "360p h264 mp4",
22: "720p h264 mp4",
26: "???",
33: "???",
34: "360p h264 flv",
35: "480p h264 flv",
36: "3gpp",
37: "1080p h264 mp4",
38: "4K h264 mp4",
43: "360p vp8 webm",
44: "480p vp8 webm",
45: "720p vp8 webm",
46: "1080p vp8 webm",
59: "480p h264 mp4",
78: "480p h264 mp4",
82: "360p h264 3D",
83: "480p h264 3D",
84: "720p h264 3D",
85: "1080p h264 3D",
100: "360p vp8 3D",
101: "480p vp8 3D",
102: "720p vp8 3D",
91:"144 h264 mp4",
92:"240 h264 mp4",
93:"360 h264 mp4",
94:"480 h264 mp4",
95:"720 h264 mp4",
96:"1080 h264 mp4",
132:"240 h264 mp4",
151:"72 h264 mp4"
}
# from core.support import dbg; dbg()
url = 'https://www.youtube.com/get_video_info?video_id=%s&eurl=https://youtube.googleapis.com/v/%s&ssl_stream=1' % \ url = 'https://www.youtube.com/get_video_info?video_id=%s&eurl=https://youtube.googleapis.com/v/%s&ssl_stream=1' % \
(video_id, video_id) (video_id, video_id)
data = httptools.downloadpage(url).data data = httptools.downloadpage(url).data
video_urls = [] video_urls = []
params = dict(urlparse.parse_qsl(data)) params = dict(urlparse.parse_qsl(data))
if params.get('hlsvp'): if params.get('hlsvp'):
video_urls.append(["(LIVE .m3u8) [youtube]", params['hlsvp']]) video_urls.append(["(LIVE .m3u8) [youtube]", params['hlsvp']])
return video_urls return video_urls
@@ -140,62 +196,27 @@ def extract_videos(video_id):
if params.get('use_cipher_signature', '') != 'True': if params.get('use_cipher_signature', '') != 'True':
video_urls.append(['mpd HD [youtube]', params['dashmpd'], 0, '', True]) video_urls.append(['mpd HD [youtube]', params['dashmpd'], 0, '', True])
js_signature = "" youtube_page_data = httptools.downloadpage("https://www.youtube.com/watch?v=%s" % video_id).data
youtube_page_data = httptools.downloadpage("http://www.youtube.com/watch?v=%s" % video_id).data
params = extract_flashvars(youtube_page_data) params = extract_flashvars(youtube_page_data)
data_flashvars =[]
if params.get('adaptive_fmts'):
data_flashvars += scrapertools.find_multiple_matches(params['adaptive_fmts'], '(fps.*?url[^,]+)')
if params.get('url_encoded_fmt_stream_map'):
data_flashvars += params["url_encoded_fmt_stream_map"].split(",")
for url_desc in data_flashvars: if params.get('player_response'):
url_desc_map = dict(urlparse.parse_qsl(url_desc)) params = json.load(params.get('player_response'))
if not url_desc_map.get("url") and not url_desc_map.get("stream"): data_flashvars = params["streamingData"]
continue for s_data in data_flashvars:
try: if s_data in ["adaptiveFormats", "formats"]:
key = int(url_desc_map["itag"]) for opt in data_flashvars[s_data]:
if not fmt_value.get(key): opt = dict(opt)
continue if "audioQuality" not in opt:
continue
if url_desc_map.get("url"): if "cipher" in opt:
url = urllib.unquote(url_desc_map["url"]) signature = get_signature(youtube_page_data)
elif url_desc_map.get("conn") and url_desc_map.get("stream"): cipher = dict(urlparse.parse_qsl(urllib.unquote(opt["cipher"])))
url = urllib.unquote(url_desc_map["conn"]) url = re.search('url=(.*)', opt["cipher"]).group(1)
if url.rfind("/") < len(url) - 1: s = cipher.get('s')
url += "/" url = "%s&sig=%s" % (urllib.unquote(url), signature([s]))
url += urllib.unquote(url_desc_map["stream"]) video_urls.append(["%s" % itag_list.get(opt["itag"], "video"), url])
elif url_desc_map.get("stream") and not url_desc_map.get("conn"): elif opt["itag"] in itag_list:
url = urllib.unquote(url_desc_map["stream"]) video_urls.append(["%s" % itag_list.get(opt["itag"], "video"), opt["url"]])
if url_desc_map.get("sig"):
url += "&signature=" + url_desc_map["sig"]
elif url_desc_map.get("s"):
sig = url_desc_map["s"]
if not js_signature:
urljs = scrapertools.find_single_match(youtube_page_data, '"assets":.*?"js":\s*"([^"]+)"')
urljs = urljs.replace("\\", "")
if urljs:
if not re.search(r'https?://', urljs):
urljs = urlparse.urljoin("https://www.youtube.com", urljs)
data_js = httptools.downloadpage(urljs).data
from jsinterpreter import JSInterpreter
funcname = scrapertools.find_single_match(data_js, '\.sig\|\|([A-z0-9$]+)\(')
if not funcname:
funcname = scrapertools.find_single_match(data_js, '["\']signature["\']\s*,\s*'
'([A-z0-9$]+)\(')
if not funcname:
funcname = scrapertools.find_single_match(data_js, r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(')
jsi = JSInterpreter(data_js)
js_signature = jsi.extract_function(funcname)
signature = js_signature([sig])
url += "&sig=" + signature
url = url.replace(",", "%2C")
video_urls.append(["(" + fmt_value[key] + ") [youtube]", url])
except:
import traceback
logger.info(traceback.format_exc())
return video_urls return video_urls