recaptcha librerie

2021-11-21 13:34:57 +01:00
parent 264b2d4292
commit 377044a879
32 changed files with 9845 additions and 0 deletions
@@ -0,0 +1,113 @@
+# Copyright (C) 2017, 2019, 2021 taylor.fish <contact@taylor.fish>
+#
+# This file is part of librecaptcha.
+#
+# librecaptcha is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# librecaptcha is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with librecaptcha.  If not, see <https://www.gnu.org/licenses/>.
+
+from .typing import List
+import requests
+
+import json
+import os
+import os.path
+import re
+import sys
+
+SHOW_WARNINGS = False
+
+
+def load_javascript(url: str, user_agent: str) -> str:
+    print("Downloading <{}>...".format(url), file=sys.stderr)
+    r = requests.get(url, headers={
+        "User-Agent": user_agent,
+    })
+    return r.text
+
+
+def extract_strings_slimit(javascript: str) -> List[str]:
+    from slimit.parser import Parser
+    from slimit import ast
+
+    if SHOW_WARNINGS:
+        parser = Parser()
+    else:
+        # File descriptor hackiness to silence warnings
+        null_fd = os.open(os.devnull, os.O_RDWR)
+        old_fd = os.dup(2)
+        try:
+            os.dup2(null_fd, 2)
+            parser = Parser()
+        finally:
+            os.dup2(old_fd, 2)
+            os.close(null_fd)
+            os.close(old_fd)
+
+    # Hack to work around https://github.com/rspivak/slimit/issues/52
+    KEYWORDS = r"(?:catch|delete|return|throw)"
+    javascript = re.sub(rf"(\.\s*{KEYWORDS})\b", r"\1_", javascript)
+    javascript = re.sub(rf"\b({KEYWORDS})(\s*:)", r"'\1'\2", javascript)
+    parsed = parser.parse(javascript)
+    strings = []
+
+    def add_strings(tree, strings):
+        if tree is None:
+            return
+        if not isinstance(tree, (ast.Node, list, tuple)):
+            raise TypeError("Unexpected item: {!r}".format(tree))
+        if isinstance(tree, ast.String):
+            strings.append(tree.value[1:-1])
+
+        children = tree
+        if isinstance(tree, ast.Node):
+            children = tree.children()
+        for child in children:
+            add_strings(child, strings)
+
+    add_strings(parsed, strings)
+    return strings
+
+
+def extract_strings(javascript: str) -> List[str]:
+    print("Extracting strings...", file=sys.stderr)
+    try:
+        import esprima
+    except ImportError:
+        return extract_strings_slimit(javascript)
+
+    strings = []
+
+    def handle_node(node, *args):
+        if node.type == "Literal" and isinstance(node.value, str):
+            strings.append(node.value)
+
+    esprima.parseScript(javascript, delegate=handle_node)
+    return strings
+
+
+def extract_and_save(
+    url: str,
+    path: str,
+    version: str,
+    rc_version: str,
+    user_agent: str,
+) -> List[str]:
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    with open(path, "w") as f:
+        print("{}/{}".format(version, rc_version), file=f)
+        js = load_javascript(url, user_agent)
+        strings = extract_strings(js)
+        strings_json = json.dumps(strings)
+        print('Saving strings to "{}"...'.format(path), file=sys.stderr)
+        f.write(strings_json)
+        return strings