folder reorganization

2019-04-23 14:32:53 +02:00
parent 659751b2f4
commit 8e7ee78a87
1195 changed files with 267003 additions and 2 deletions
@@ -0,0 +1 @@
+# -*- coding: utf-8 -*-
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+
+from base64 import b64decode
+
+from core import httptools
+from core import scrapertools
+from platformcode import logger
+
+
+def get_long_url(short_url):
+    logger.info("short_url = '%s'" % short_url)
+
+    data = httptools.downloadpage(short_url).data
+    ysmm = scrapertools.find_single_match(data, "var ysmm = '([^']+)';")
+    b64 = ""
+    for i in reversed(range(len(ysmm))):
+        if i % 2:
+            b64 = b64 + ysmm[i]
+        else:
+            b64 = ysmm[i] + b64
+
+    decoded_uri = b64decode(b64)[2:]
+
+    if "adf.ly/redirecting" in decoded_uri:
+        data = httptools.downloadpage(decoded_uri).data
+        decoded_uri = scrapertools.find_single_match(data, "window.location = '([^']+)'")
+
+    return decoded_uri
@@ -0,0 +1,14 @@
+# -*- coding: utf-8 -*-
+
+from lib import unshortenit
+
+def expand_url(url):
+    e = unshortenit.UnshortenIt()
+    estado = 200
+
+    while estado != 0:
+        long_url, estado = e.unshorten(url)
+        url = long_url
+
+    return long_url
+
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+
+import urllib
+
+from core import scrapertools
+from platformcode import logger
+
+
+# Obtiene la URL que hay detrás de un enlace a linkbucks
+def get_long_url(short_url):
+    logger.info("(short_url='%s')" % short_url)
+
+    request_headers = []
+    request_headers.append(["User-Agent",
+                            "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; es-ES; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12"])
+    request_headers.append(["Referer", "http://linkdecrypter.com"])
+    post = urllib.urlencode({"pro_links": short_url, "modo_links": "text", "modo_recursivo": "on", "link_cache": "on"})
+    url = "http://linkdecrypter.com/"
+
+    # Parche porque python no parece reconocer bien la cabecera phpsessid
+    body, response_headers = scrapertools.read_body_and_headers(url, post=post, headers=request_headers)
+    location = ""
+    n = 1
+    while True:
+        for name, value in response_headers:
+            if name == "set-cookie":
+                logger.info("Set-Cookie: " + value)
+                cookie_name = scrapertools.scrapertools.find_single_match(value, '(.*?)\=.*?\;')
+                cookie_value = scrapertools.scrapertools.find_single_match(value, '.*?\=(.*?)\;')
+                request_headers.append(["Cookie", cookie_name + "=" + cookie_value])
+
+        body, response_headers = scrapertools.read_body_and_headers(url, headers=request_headers)
+        logger.info("body=" + body)
+
+        try:
+            location = scrapertools.scrapertools.find_single_match(body, '<textarea.*?class="caja_des">([^<]+)</textarea>')
+            logger.info("location=" + location)
+            break
+        except:
+            n = n + 1
+            if n > 3:
+                break
+
+    return location
@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+
+import re
+import urllib
+
+from core import httptools
+from core import scrapertools
+from platformcode import logger
+
+
+def get_server_list():
+    servers = []
+    data = httptools.downloadpage("http://longurl.org/services").data
+    data = scrapertools.unescape(data)
+    data = scrapertools.find_single_match(data, '<ol>(.*?)</ol>')
+    patron = '<li>(.*?)</li>'
+    matches = re.compile(patron, re.DOTALL).findall(data)
+
+    # añadiendo algunos manualmente que no salen en la web
+    servers.append("sh.st")
+
+    for server in matches:
+        servers.append(server)
+    return servers
+
+
+servers = get_server_list()
+
+
+def get_long_urls(data):
+    logger.info()
+    patron = '<a href="http://([^"]+)"'
+    matches = re.compile(patron, re.DOTALL).findall(data)
+    for short_url in matches:
+        if short_url.startswith(tuple(servers)):
+            logger.info(": " + short_url)
+            longurl_data = httptools.downloadpage(
+                "http://api.longurl.org/v2/expand?url=" + urllib.quote_plus(short_url)).data
+            logger.info(longurl_data)
+            try:
+                long_url = scrapertools.scrapertools.find_single_match(longurl_data, '<long-url><!\[CDATA\[(.*?)\]\]></long-url>')
+            except:
+                long_url = ""
+            if (long_url <> ""): data = data.replace(short_url, long_url)
+    return data
@@ -0,0 +1,13 @@
+# -*- coding: utf-8 -*-
+
+from core import scrapertools
+from platformcode import logger
+
+
+def get_long_url(short_url):
+    logger.info("(short_url='%s')" % short_url)
+
+    location = scrapertools.get_header_from_response(short_url, header_to_get="location")
+    logger.info("location=" + location)
+
+    return location
@@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+# Ringraziamo errmax e dr-z3r0
+import re
+
+from core import httptools, scrapertools, scrapertoolsV2
+
+from servers.decrypters import expurl
+from platformcode import logger
+
+
+def get_video_url(page_url, premium=False, user="", password="", video_password=""):
+
+    encontrados = {
+        'https://vcrypt.net/images/logo', 'https://vcrypt.net/css/out',
+        'https://vcrypt.net/images/favicon', 'https://vcrypt.net/css/open',
+        'http://linkup.pro/js/jquery', 'https://linkup.pro/js/jquery',
+        'http://www.rapidcrypt.net/open'
+    }
+    devuelve = []
+
+    patronvideos = [
+        r'(https?://(gestyy|rapidteria|sprysphere)\.com/[a-zA-Z0-9]+)',
+        r'(https?://(?:www\.)?(vcrypt|linkup)\.[^/]+/[^/]+/[a-zA-Z0-9_]+)'
+    ]
+
+    for patron in patronvideos:
+        logger.info(" find_videos #" + patron + "#")
+        matches = re.compile(patron).findall(page_url)
+
+        for url, host in matches:
+            if url not in encontrados:
+                logger.info("  url=" + url)
+                encontrados.add(url)
+
+                if host == 'gestyy':
+                    resp = httptools.downloadpage(
+                        url,
+                        follow_redirects=False,
+                        cookies=False,
+                        only_headers=True,
+                        replace_headers=True,
+                        headers={'User-Agent': 'curl/7.59.0'})
+                    data = resp.headers.get("location", "")
+                elif 'vcrypt.net' in url:
+                    from lib import unshortenit
+                    data, status = unshortenit.unshorten(url)
+
+                elif 'linkup' in url:
+                    idata = httptools.downloadpage(url).data
+                    data = scrapertoolsV2.find_single_match(idata, "<iframe[^<>]*src=\\'([^'>]*)\\'[^<>]*>")
+                else:
+                    data = ""
+                    while host in url:
+                        resp = httptools.downloadpage(
+                            url, follow_redirects=False)
+                        url = resp.headers.get("location", "")
+                        if not url:
+                            data = resp.data
+                        elif host not in url:
+                            data = url
+                if data:
+                    devuelve.append(data)
+            else:
+                logger.info("  url duplicada=" + url)
+
+    patron = r"""(https?://(?:www\.)?(?:threadsphere\.bid|adf\.ly|q\.gs|j\.gs|u\.bb|ay\.gy|linkbucks\.com|any\.gs|cash4links\.co|cash4files\.co|dyo\.gs|filesonthe\.net|goneviral\.com|megaline\.co|miniurls\.co|qqc\.co|seriousdeals\.net|theseblogs\.com|theseforums\.com|tinylinks\.co|tubeviral\.com|ultrafiles\.net|urlbeat\.net|whackyvidz\.com|yyv\.co|adfoc\.us|lnx\.lu|sh\.st|href\.li|anonymz\.com|shrink-service\.it|rapidcrypt\.net)/[^"']+)"""
+
+    logger.info(" find_videos #" + patron + "#")
+    matches = re.compile(patron).findall(page_url)
+
+    for url in matches:
+        if url not in encontrados:
+            logger.info("  url=" + url)
+            encontrados.add(url)
+
+            long_url = expurl.expand_url(url)
+            if long_url:
+                devuelve.append(long_url)
+        else:
+            logger.info("  url duplicada=" + url)
+
+    ret = page_url+" "+str(devuelve) if devuelve else page_url
+    logger.info(" RET=" + str(ret))
+    return ret
+
+