From 3c7a2cef98be62f3930480b42d96972b25fcd9b8 Mon Sep 17 00:00:00 2001 From: Intel1 <25161862+Intel11@users.noreply.github.com> Date: Tue, 15 May 2018 11:11:56 -0500 Subject: [PATCH] Update unshortenit.py --- plugin.video.alfa/lib/unshortenit.py | 68 ---------------------------- 1 file changed, 68 deletions(-) diff --git a/plugin.video.alfa/lib/unshortenit.py b/plugin.video.alfa/lib/unshortenit.py index ee953a06..07141af1 100755 --- a/plugin.video.alfa/lib/unshortenit.py +++ b/plugin.video.alfa/lib/unshortenit.py @@ -37,21 +37,15 @@ class UnshortenIt(object): _anonymz_regex = r'anonymz\.com' _shrink_service_regex = r'shrink-service\.it' _rapidcrypt_regex = r'rapidcrypt\.net' - _maxretries = 5 - _this_dir, _this_filename = os.path.split(__file__) _timeout = 10 def unshorten(self, uri, type=None): - domain = urlsplit(uri).netloc - if not domain: return uri, "No domain found in URI!" - had_google_outbound, uri = self._clear_google_outbound_proxy(uri) - if re.search(self._adfly_regex, domain, re.IGNORECASE) or type == 'adfly': return self._unshorten_adfly(uri) @@ -74,21 +68,15 @@ class UnshortenIt(object): return self._unshorten_anonymz(uri) if re.search(self._rapidcrypt_regex, domain, re.IGNORECASE): return self._unshorten_rapidcrypt(uri) - return uri, 200 - def unwrap_30x(self, uri, timeout=10): - domain = urlsplit(uri).netloc self._timeout = timeout - loop_counter = 0 try: - if loop_counter > 5: raise ValueError("Infinitely looping redirect from URL: '%s'" % (uri,)) - # headers stop t.co from working so omit headers if this is a t.co link if domain == 't.co': r = httptools.downloadpage(uri, timeout=self._timeout) @@ -108,7 +96,6 @@ class UnshortenIt(object): only_headers=True) if not r.success: return uri, -1 - retries = 0 if 'location' in r.headers and retries < self._maxretries: r = httptools.downloadpage( @@ -120,10 +107,8 @@ class UnshortenIt(object): retries = retries + 1 else: return r.url, r.code - except Exception as e: return uri, str(e) - def _clear_google_outbound_proxy(self, url): ''' So google proxies all their outbound links through a redirect so they can detect outbound links. @@ -132,16 +117,13 @@ class UnshortenIt(object): This is useful for doing things like parsing google search results, or if you're scraping google docs, where google inserts hit-counters on all outbound links. ''' - # This is kind of hacky, because we need to check both the netloc AND # part of the path. We could use urllib.parse.urlsplit, but it's # easier and just as effective to use string checks. if url.startswith("http://www.google.com/url?") or \ url.startswith("https://www.google.com/url?"): - qs = urlparse(url).query query = parse_qs(qs) - if "q" in query: # Google doc outbound links (maybe blogspot, too) return True, query["q"].pop() elif "url" in query: # Outbound links from google searches @@ -150,7 +132,6 @@ class UnshortenIt(object): raise ValueError( "Google outbound proxy URL without a target url ('%s')?" % url) - return False, url def _unshorten_adfly(self, uri): @@ -163,14 +144,11 @@ class UnshortenIt(object): if len(ysmm) > 0: ysmm = re.sub(r'var ysmm \= \'|\'\;', '', ysmm[0]) - left = '' right = '' - for c in [ysmm[i:i + 2] for i in range(0, len(ysmm), 2)]: left += c[0] right = c[1] + right - # Additional digit arithmetic encoded_uri = list(left + right) numbers = ((i, n) for i, n in enumerate(encoded_uri) if str.isdigit(n)) @@ -178,12 +156,9 @@ class UnshortenIt(object): xor = int(first[1]) ^ int(second[1]) if xor < 10: encoded_uri[first[0]] = str(xor) - decoded_uri = b64decode("".join(encoded_uri).encode())[16:-16].decode() - if re.search(r'go\.php\?u\=', decoded_uri): decoded_uri = b64decode(re.sub(r'(.*?)u=', '', decoded_uri)).decode() - return decoded_uri, r.code else: return uri, 'No ysmm variable found' @@ -195,23 +170,15 @@ class UnshortenIt(object): ''' (Attempt) to decode linkbucks content. HEAVILY based on the OSS jDownloader codebase. This has necessidated a license change. - ''' - r = httptools.downloadpage(uri, timeout=self._timeout) - firstGet = time.time() - baseloc = r.url - if "/notfound/" in r.url or \ "(>Link Not Found<|>The link may have been deleted by the owner|To access the content, you must complete a quick survey\.)" in r.data: return uri, 'Error: Link not found or requires a survey!' - link = None - content = r.data - regexes = [ r"