From 3c7a2cef98be62f3930480b42d96972b25fcd9b8 Mon Sep 17 00:00:00 2001 From: Intel1 <25161862+Intel11@users.noreply.github.com> Date: Tue, 15 May 2018 11:11:56 -0500 Subject: [PATCH] Update unshortenit.py --- plugin.video.alfa/lib/unshortenit.py | 68 ---------------------------- 1 file changed, 68 deletions(-) diff --git a/plugin.video.alfa/lib/unshortenit.py b/plugin.video.alfa/lib/unshortenit.py index ee953a06..07141af1 100755 --- a/plugin.video.alfa/lib/unshortenit.py +++ b/plugin.video.alfa/lib/unshortenit.py @@ -37,21 +37,15 @@ class UnshortenIt(object): _anonymz_regex = r'anonymz\.com' _shrink_service_regex = r'shrink-service\.it' _rapidcrypt_regex = r'rapidcrypt\.net' - _maxretries = 5 - _this_dir, _this_filename = os.path.split(__file__) _timeout = 10 def unshorten(self, uri, type=None): - domain = urlsplit(uri).netloc - if not domain: return uri, "No domain found in URI!" - had_google_outbound, uri = self._clear_google_outbound_proxy(uri) - if re.search(self._adfly_regex, domain, re.IGNORECASE) or type == 'adfly': return self._unshorten_adfly(uri) @@ -74,21 +68,15 @@ class UnshortenIt(object): return self._unshorten_anonymz(uri) if re.search(self._rapidcrypt_regex, domain, re.IGNORECASE): return self._unshorten_rapidcrypt(uri) - return uri, 200 - def unwrap_30x(self, uri, timeout=10): - domain = urlsplit(uri).netloc self._timeout = timeout - loop_counter = 0 try: - if loop_counter > 5: raise ValueError("Infinitely looping redirect from URL: '%s'" % (uri,)) - # headers stop t.co from working so omit headers if this is a t.co link if domain == 't.co': r = httptools.downloadpage(uri, timeout=self._timeout) @@ -108,7 +96,6 @@ class UnshortenIt(object): only_headers=True) if not r.success: return uri, -1 - retries = 0 if 'location' in r.headers and retries < self._maxretries: r = httptools.downloadpage( @@ -120,10 +107,8 @@ class UnshortenIt(object): retries = retries + 1 else: return r.url, r.code - except Exception as e: return uri, str(e) - def _clear_google_outbound_proxy(self, url): ''' So google proxies all their outbound links through a redirect so they can detect outbound links. @@ -132,16 +117,13 @@ class UnshortenIt(object): This is useful for doing things like parsing google search results, or if you're scraping google docs, where google inserts hit-counters on all outbound links. ''' - # This is kind of hacky, because we need to check both the netloc AND # part of the path. We could use urllib.parse.urlsplit, but it's # easier and just as effective to use string checks. if url.startswith("http://www.google.com/url?") or \ url.startswith("https://www.google.com/url?"): - qs = urlparse(url).query query = parse_qs(qs) - if "q" in query: # Google doc outbound links (maybe blogspot, too) return True, query["q"].pop() elif "url" in query: # Outbound links from google searches @@ -150,7 +132,6 @@ class UnshortenIt(object): raise ValueError( "Google outbound proxy URL without a target url ('%s')?" % url) - return False, url def _unshorten_adfly(self, uri): @@ -163,14 +144,11 @@ class UnshortenIt(object): if len(ysmm) > 0: ysmm = re.sub(r'var ysmm \= \'|\'\;', '', ysmm[0]) - left = '' right = '' - for c in [ysmm[i:i + 2] for i in range(0, len(ysmm), 2)]: left += c[0] right = c[1] + right - # Additional digit arithmetic encoded_uri = list(left + right) numbers = ((i, n) for i, n in enumerate(encoded_uri) if str.isdigit(n)) @@ -178,12 +156,9 @@ class UnshortenIt(object): xor = int(first[1]) ^ int(second[1]) if xor < 10: encoded_uri[first[0]] = str(xor) - decoded_uri = b64decode("".join(encoded_uri).encode())[16:-16].decode() - if re.search(r'go\.php\?u\=', decoded_uri): decoded_uri = b64decode(re.sub(r'(.*?)u=', '', decoded_uri)).decode() - return decoded_uri, r.code else: return uri, 'No ysmm variable found' @@ -195,23 +170,15 @@ class UnshortenIt(object): ''' (Attempt) to decode linkbucks content. HEAVILY based on the OSS jDownloader codebase. This has necessidated a license change. - ''' - r = httptools.downloadpage(uri, timeout=self._timeout) - firstGet = time.time() - baseloc = r.url - if "/notfound/" in r.url or \ "(>Link Not Found<|>The link may have been deleted by the owner|To access the content, you must complete a quick survey\.)" in r.data: return uri, 'Error: Link not found or requires a survey!' - link = None - content = r.data - regexes = [ r"
.*?/a>.*?.*?[^<]+)", content) if not scripts: return uri, "No script bodies found?" - js = False - for script in scripts: # cleanup script = re.sub(r"[\r\n\s]+\/\/\s*[^\r\n]+", "", script) if re.search(r"\s*var\s*f\s*=\s*window\['init'\s*\+\s*'Lb'\s*\+\s*'js'\s*\+\s*''\];[\r\n\s]+", script): js = script - if not js: return uri, "Could not find correct script?" - token = find_in_text(r"Token\s*:\s*'([a-f0-9]{40})'", js) if not token: token = find_in_text(r"\?t=([a-f0-9]{40})", js) - assert token - authKeyMatchStr = r"A(?:'\s*\+\s*')?u(?:'\s*\+\s*')?t(?:'\s*\+\s*')?h(?:'\s*\+\s*')?K(?:'\s*\+\s*')?e(?:'\s*\+\s*')?y" l1 = find_in_text(r"\s*params\['" + authKeyMatchStr + r"'\]\s*=\s*(\d+?);", js) l2 = find_in_text( r"\s*params\['" + authKeyMatchStr + r"'\]\s*=\s?params\['" + authKeyMatchStr + r"'\]\s*\+\s*(\d+?);", js) - if any([not l1, not l2, not token]): return uri, "Missing required tokens?" - authkey = int(l1) + int(l2) - p1_url = urljoin(baseloc, "/director/?t={tok}".format(tok=token)) r2 = httptools.downloadpage(p1_url, timeout=self._timeout) - p1_url = urljoin(baseloc, "/scripts/jquery.js?r={tok}&{key}".format(tok=token, key=l1)) r2_1 = httptools.downloadpage(p1_url, timeout=self._timeout) - time_left = 5.033 - (time.time() - firstGet) xbmc.sleep(max(time_left, 0) * 1000) - p3_url = urljoin(baseloc, "/intermission/loadTargetUrl?t={tok}&aK={key}&a_b=false".format(tok=token, key=str(authkey))) r3 = httptools.downloadpage(p3_url, timeout=self._timeout) - resp_json = json.loads(r3.data) if "Url" in resp_json: return resp_json['Url'], r3.code - return "Wat", "wat" def inValidate(self, s): @@ -287,30 +237,23 @@ class UnshortenIt(object): # (s == null || s != null && (s.matches("[\r\n\t ]+") || s.equals("") || s.equalsIgnoreCase("about:blank"))) if not s: return True - if re.search("[\r\n\t ]+", s) or s.lower() == "about:blank": return True else: return False - def _unshorten_adfocus(self, uri): orig_uri = uri try: - r = httptools.downloadpage(uri, timeout=self._timeout) html = r.data - adlink = re.findall("click_url =.*;", html) - if len(adlink) > 0: uri = re.sub('^click_url = "|"\;$', '', adlink[0]) if re.search(r'http(s|)\://adfoc\.us/serve/skip/\?id\=', uri): http_header = dict() http_header["Host"] = "adfoc.us" http_header["Referer"] = orig_uri - r = httptools.downloadpage(uri, headers=http_header, timeout=self._timeout) - uri = r.url return uri, r.code else: @@ -340,20 +283,16 @@ class UnshortenIt(object): try: r = httptools.downloadpage(uri, timeout=self._timeout) html = r.data - session_id = re.findall(r'sessionId\:(.*?)\"\,', html) if len(session_id) > 0: session_id = re.sub(r'\s\"', '', session_id[0]) - http_header = dict() http_header["Content-Type"] = "application/x-www-form-urlencoded" http_header["Host"] = "sh.st" http_header["Referer"] = uri http_header["Origin"] = "http://sh.st" http_header["X-Requested-With"] = "XMLHttpRequest" - xbmc.sleep(5 * 1000) - payload = {'adSessionId': session_id, 'callback': 'c'} r = httptools.downloadpage( 'http://sh.st/shortest-url/end-adsession?' + @@ -361,7 +300,6 @@ class UnshortenIt(object): headers=http_header, timeout=self._timeout) response = r.data[6:-2].decode('utf-8') - if r.code == 200: resp_uri = json.loads(response)['destinationUrl'] if resp_uri is not None: @@ -401,12 +339,9 @@ class UnshortenIt(object): try: r = httptools.downloadpage(uri, timeout=self._timeout, cookies=False) html = r.data - uri = re.findall(r"", html)[0] - from core import scrapertools uri = scrapertools.decodeHtmlentities(uri) - uri = uri.replace("/", "/") \ .replace(":", ":") \ .replace(".", ".") \ @@ -414,7 +349,6 @@ class UnshortenIt(object): .replace("#", "#") \ .replace("?", "?") \ .replace("_", "_") - return uri, r.code except Exception as e: @@ -424,9 +358,7 @@ class UnshortenIt(object): try: r = httptools.downloadpage(uri, timeout=self._timeout, cookies=False) html = r.data - uri = re.findall(r'Click to continue', html)[0] - return uri, r.code except Exception as e: