folder reorganization

This commit is contained in:
cttynul
2019-04-23 14:32:53 +02:00
parent 659751b2f4
commit 8e7ee78a87
1195 changed files with 267003 additions and 2 deletions
+38
View File
@@ -0,0 +1,38 @@
# The MIT License
#
# Copyright 2014, 2015 Piotr Dabkowski
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the 'Software'),
# to deal in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so, subject
# to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
# LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE
__all__ = [
'PyJsParser', 'Node', 'WrappingNode', 'node_to_dict', 'parse',
'translate_js', 'translate', 'syntax_tree_translate', 'DEFAULT_HEADER'
]
__author__ = 'Piotr Dabkowski'
__version__ = '2.2.0'
from pyjsparser import PyJsParser
from .translator import translate_js, trasnlate, syntax_tree_translate, DEFAULT_HEADER
def parse(javascript_code):
"""Returns syntax tree of javascript_code.
Syntax tree has the same structure as syntax tree produced by esprima.js
Same as PyJsParser().parse For your convenience :) """
p = PyJsParser()
return p.parse(javascript_code)
+375
View File
@@ -0,0 +1,375 @@
import binascii
from pyjsparser import PyJsParser
import six
if six.PY3:
basestring = str
long = int
xrange = range
unicode = str
REGEXP_CONVERTER = PyJsParser()
def to_hex(s):
return binascii.hexlify(s.encode('utf8')).decode(
'utf8') # fucking python 3, I hate it so much
# wtf was wrong with s.encode('hex') ???
def indent(lines, ind=4):
return ind * ' ' + lines.replace('\n', '\n' + ind * ' ').rstrip(' ')
def inject_before_lval(source, lval, code):
if source.count(lval) > 1:
print()
print(lval)
raise RuntimeError('To many lvals (%s)' % lval)
elif not source.count(lval):
print()
print(lval)
assert lval not in source
raise RuntimeError('No lval found "%s"' % lval)
end = source.index(lval)
inj = source.rfind('\n', 0, end)
ind = inj
while source[ind + 1] == ' ':
ind += 1
ind -= inj
return source[:inj + 1] + indent(code, ind) + source[inj + 1:]
def get_continue_label(label):
return CONTINUE_LABEL % to_hex(label)
def get_break_label(label):
return BREAK_LABEL % to_hex(label)
def is_valid_py_name(name):
try:
compile(name + ' = 11', 'a', 'exec')
except:
return False
return True
def indent(lines, ind=4):
return ind * ' ' + lines.replace('\n', '\n' + ind * ' ').rstrip(' ')
def compose_regex(val):
reg, flags = val
#reg = REGEXP_CONVERTER._unescape_string(reg)
return u'/%s/%s' % (reg, flags)
def float_repr(f):
if int(f) == f:
return repr(int(f))
return repr(f)
def argsplit(args, sep=','):
"""used to split JS args (it is not that simple as it seems because
sep can be inside brackets).
pass args *without* brackets!
Used also to parse array and object elements, and more"""
parsed_len = 0
last = 0
splits = []
for e in bracket_split(args, brackets=['()', '[]', '{}']):
if e[0] not in ('(', '[', '{'):
for i, char in enumerate(e):
if char == sep:
splits.append(args[last:parsed_len + i])
last = parsed_len + i + 1
parsed_len += len(e)
splits.append(args[last:])
return splits
def bracket_split(source, brackets=('()', '{}', '[]'), strip=False):
"""DOES NOT RETURN EMPTY STRINGS (can only return empty bracket content if strip=True)"""
starts = [e[0] for e in brackets]
in_bracket = 0
n = 0
last = 0
while n < len(source):
e = source[n]
if not in_bracket and e in starts:
in_bracket = 1
start = n
b_start, b_end = brackets[starts.index(e)]
elif in_bracket:
if e == b_start:
in_bracket += 1
elif e == b_end:
in_bracket -= 1
if not in_bracket:
if source[last:start]:
yield source[last:start]
last = n + 1
yield source[start + strip:n + 1 - strip]
n += 1
if source[last:]:
yield source[last:]
def js_comma(a, b):
return 'PyJsComma(' + a + ',' + b + ')'
def js_or(a, b):
return '(' + a + ' or ' + b + ')'
def js_bor(a, b):
return '(' + a + '|' + b + ')'
def js_bxor(a, b):
return '(' + a + '^' + b + ')'
def js_band(a, b):
return '(' + a + '&' + b + ')'
def js_and(a, b):
return '(' + a + ' and ' + b + ')'
def js_strict_eq(a, b):
return 'PyJsStrictEq(' + a + ',' + b + ')'
def js_strict_neq(a, b):
return 'PyJsStrictNeq(' + a + ',' + b + ')'
#Not handled by python in the same way like JS. For example 2==2==True returns false.
# In JS above would return true so we need brackets.
def js_abstract_eq(a, b):
return '(' + a + '==' + b + ')'
#just like ==
def js_abstract_neq(a, b):
return '(' + a + '!=' + b + ')'
def js_lt(a, b):
return '(' + a + '<' + b + ')'
def js_le(a, b):
return '(' + a + '<=' + b + ')'
def js_ge(a, b):
return '(' + a + '>=' + b + ')'
def js_gt(a, b):
return '(' + a + '>' + b + ')'
def js_in(a, b):
return b + '.contains(' + a + ')'
def js_instanceof(a, b):
return a + '.instanceof(' + b + ')'
def js_lshift(a, b):
return '(' + a + '<<' + b + ')'
def js_rshift(a, b):
return '(' + a + '>>' + b + ')'
def js_shit(a, b):
return 'PyJsBshift(' + a + ',' + b + ')'
def js_add(
a,
b): # To simplify later process of converting unary operators + and ++
return '(%s+%s)' % (a, b)
def js_sub(a, b): # To simplify
return '(%s-%s)' % (a, b)
def js_mul(a, b):
return '(' + a + '*' + b + ')'
def js_div(a, b):
return '(' + a + '/' + b + ')'
def js_mod(a, b):
return '(' + a + '%' + b + ')'
def js_typeof(a):
cand = list(bracket_split(a, ('()', )))
if len(cand) == 2 and cand[0] == 'var.get':
return cand[0] + cand[1][:-1] + ',throw=False).typeof()'
return a + '.typeof()'
def js_void(a):
# eval and return undefined
return 'PyJsComma(%s, Js(None))' % a
def js_new(a):
cands = list(bracket_split(a, ('()', )))
lim = len(cands)
if lim < 2:
return a + '.create()'
n = 0
while n < lim:
c = cands[n]
if c[0] == '(':
if cands[n - 1].endswith(
'.get') and n + 1 >= lim: # last get operation.
return a + '.create()'
elif cands[n - 1][0] == '(':
return ''.join(cands[:n]) + '.create' + c + ''.join(
cands[n + 1:])
elif cands[n - 1] == '.callprop':
beg = ''.join(cands[:n - 1])
args = argsplit(c[1:-1], ',')
prop = args[0]
new_args = ','.join(args[1:])
create = '.get(%s).create(%s)' % (prop, new_args)
return beg + create + ''.join(cands[n + 1:])
n += 1
return a + '.create()'
def js_delete(a):
#replace last get with delete.
c = list(bracket_split(a, ['()']))
beg, arglist = ''.join(c[:-1]).strip(), c[-1].strip(
) #strips just to make sure... I will remove it later
if beg[-4:] != '.get':
print(a)
raise SyntaxError('Invalid delete operation')
return beg[:-3] + 'delete' + arglist
def js_neg(a):
return '(-' + a + ')'
def js_pos(a):
return '(+' + a + ')'
def js_inv(a):
return '(~' + a + ')'
def js_not(a):
return a + '.neg()'
def js_postfix(a, inc, post):
bra = list(bracket_split(a, ('()', )))
meth = bra[-2]
if not meth.endswith('get'):
raise SyntaxError('Invalid ++ or -- operation.')
bra[-2] = bra[-2][:-3] + 'put'
bra[-1] = '(%s,Js(%s.to_number())%sJs(1))' % (bra[-1][1:-1], a,
'+' if inc else '-')
res = ''.join(bra)
return res if not post else '(%s%sJs(1))' % (res, '-' if inc else '+')
def js_pre_inc(a):
return js_postfix(a, True, False)
def js_post_inc(a):
return js_postfix(a, True, True)
def js_pre_dec(a):
return js_postfix(a, False, False)
def js_post_dec(a):
return js_postfix(a, False, True)
CONTINUE_LABEL = 'JS_CONTINUE_LABEL_%s'
BREAK_LABEL = 'JS_BREAK_LABEL_%s'
PREPARE = '''HOLDER = var.own.get(NAME)\nvar.force_own_put(NAME, PyExceptionToJs(PyJsTempException))\n'''
RESTORE = '''if HOLDER is not None:\n var.own[NAME] = HOLDER\nelse:\n del var.own[NAME]\ndel HOLDER\n'''
TRY_CATCH = '''%stry:\nBLOCKfinally:\n%s''' % (PREPARE, indent(RESTORE))
OR = {'||': js_or}
AND = {'&&': js_and}
BOR = {'|': js_bor}
BXOR = {'^': js_bxor}
BAND = {'&': js_band}
EQS = {
'===': js_strict_eq,
'!==': js_strict_neq,
'==': js_abstract_eq, # we need == and != too. Read a note above method
'!=': js_abstract_neq
}
#Since JS does not have chained comparisons we need to implement all cmp methods.
COMPS = {
'<': js_lt,
'<=': js_le,
'>=': js_ge,
'>': js_gt,
'instanceof': js_instanceof, #todo change to validitate
'in': js_in
}
BSHIFTS = {'<<': js_lshift, '>>': js_rshift, '>>>': js_shit}
ADDS = {'+': js_add, '-': js_sub}
MULTS = {'*': js_mul, '/': js_div, '%': js_mod}
BINARY = {}
BINARY.update(ADDS)
BINARY.update(MULTS)
BINARY.update(BSHIFTS)
BINARY.update(COMPS)
BINARY.update(EQS)
BINARY.update(BAND)
BINARY.update(BXOR)
BINARY.update(BOR)
BINARY.update(AND)
BINARY.update(OR)
#Note they dont contain ++ and -- methods because they both have 2 different methods
# correct method will be found automatically in translate function
UNARY = {
'typeof': js_typeof,
'void': js_void,
'new': js_new,
'delete': js_delete,
'!': js_not,
'-': js_neg,
'+': js_pos,
'~': js_inv,
'++': None,
'--': None
}
+211
View File
@@ -0,0 +1,211 @@
from pyjsparser.pyjsparserdata import *
REGEXP_SPECIAL_SINGLE = {'\\', '^', '$', '*', '+', '?', '.'}
NOT_PATTERN_CHARS = {
'^', '$', '\\', '.', '*', '+', '?', '(', ')', '[', ']', '|'
} # what about '{', '}', ???
CHAR_CLASS_ESCAPE = {'d', 'D', 's', 'S', 'w', 'W'}
CONTROL_ESCAPE_CHARS = {'f', 'n', 'r', 't', 'v'}
CONTROL_LETTERS = {
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D',
'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S',
'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
}
def SpecialChar(char):
return {'type': 'SpecialChar', 'content': char}
def isPatternCharacter(char):
return char not in NOT_PATTERN_CHARS
class JsRegExpParser:
def __init__(self, source, flags):
self.source = source
self.flags = flags
self.index = 0
self.length = len(source)
self.lineNumber = 0
self.lineStart = 0
def parsePattern(self):
'''Perform sctring escape - for regexp literals'''
return {'type': 'Pattern', 'contents': self.parseDisjunction()}
def parseDisjunction(self):
alternatives = []
while True:
alternatives.append(self.parseAlternative())
if not self.isEOF():
self.expect_character('|')
else:
break
return {'type': 'Disjunction', 'contents': alternatives}
def isEOF(self):
if self.index >= self.length:
return True
return False
def expect_character(self, character):
if self.source[self.index] != character:
self.throwUnexpected(character)
self.index += 1
def parseAlternative(self):
contents = []
while not self.isEOF() and self.source[self.index] != '|':
contents.append(self.parseTerm())
return {'type': 'Alternative', 'contents': contents}
def follows(self, chars):
for i, c in enumerate(chars):
if self.index + i >= self.length or self.source[self.index +
i] != c:
return False
return True
def parseTerm(self):
assertion = self.parseAssertion()
if assertion:
return assertion
else:
return {
'type': 'Term',
'contents': self.parseAtom()
} # quantifier will go inside atom!
def parseAssertion(self):
if self.follows('$'):
content = SpecialChar('$')
self.index += 1
elif self.follows('^'):
content = SpecialChar('^')
self.index += 1
elif self.follows('\\b'):
content = SpecialChar('\\b')
self.index += 2
elif self.follows('\\B'):
content = SpecialChar('\\B')
self.index += 2
elif self.follows('(?='):
self.index += 3
dis = self.parseDisjunction()
self.expect_character(')')
content = {'type': 'Lookached', 'contents': dis, 'negated': False}
elif self.follows('(?!'):
self.index += 3
dis = self.parseDisjunction()
self.expect_character(')')
content = {'type': 'Lookached', 'contents': dis, 'negated': True}
else:
return None
return {'type': 'Assertion', 'content': content}
def parseAtom(self):
if self.follows('.'):
content = SpecialChar('.')
self.index += 1
elif self.follows('\\'):
self.index += 1
content = self.parseAtomEscape()
elif self.follows('['):
content = self.parseCharacterClass()
elif self.follows('(?:'):
self.index += 3
dis = self.parseDisjunction()
self.expect_character(')')
content = 'idk'
elif self.follows('('):
self.index += 1
dis = self.parseDisjunction()
self.expect_character(')')
content = 'idk'
elif isPatternCharacter(self.source[self.index]):
content = self.source[self.index]
self.index += 1
else:
return None
quantifier = self.parseQuantifier()
return {'type': 'Atom', 'content': content, 'quantifier': quantifier}
def parseQuantifier(self):
prefix = self.parseQuantifierPrefix()
if not prefix:
return None
greedy = True
if self.follows('?'):
self.index += 1
greedy = False
return {'type': 'Quantifier', 'contents': prefix, 'greedy': greedy}
def parseQuantifierPrefix(self):
if self.isEOF():
return None
if self.follows('+'):
content = '+'
self.index += 1
elif self.follows('?'):
content = '?'
self.index += 1
elif self.follows('*'):
content = '*'
self.index += 1
elif self.follows(
'{'
): # try matching otherwise return None and restore the state
i = self.index
self.index += 1
digs1 = self.scanDecimalDigs()
# if no minimal number of digs provided then return no quantifier
if not digs1:
self.index = i
return None
# scan char limit if provided
if self.follows(','):
self.index += 1
digs2 = self.scanDecimalDigs()
else:
digs2 = ''
# must be valid!
if not self.follows('}'):
self.index = i
return None
else:
self.expect_character('}')
content = int(digs1), int(digs2) if digs2 else None
else:
return None
return content
def parseAtomEscape(self):
ch = self.source[self.index]
if isDecimalDigit(ch) and ch != 0:
digs = self.scanDecimalDigs()
elif ch in CHAR_CLASS_ESCAPE:
self.index += 1
return SpecialChar('\\' + ch)
else:
return self.parseCharacterEscape()
def parseCharacterEscape(self):
ch = self.source[self.index]
if ch in CONTROL_ESCAPE_CHARS:
return SpecialChar('\\' + ch)
if ch == 'c':
'ok, fuck this shit.'
def scanDecimalDigs(self):
s = self.index
while not self.isEOF() and isDecimalDigit(self.source[self.index]):
self.index += 1
return self.source[s:self.index]
a = JsRegExpParser('a(?=x)', '')
print(a.parsePattern())
+688
View File
@@ -0,0 +1,688 @@
from __future__ import unicode_literals
from pyjsparser.pyjsparserdata import *
from .friendly_nodes import *
import random
import six
if six.PY3:
from functools import reduce
xrange = range
unicode = str
# number of characters above which expression will be split to multiple lines in order to avoid python parser stack overflow
# still experimental so I suggest to set it to 400 in order to avoid common errors
# set it to smaller value only if you have problems with parser stack overflow
LINE_LEN_LIMIT = 400 # 200 # or any other value - the larger the smaller probability of errors :)
class ForController:
def __init__(self):
self.inside = [False]
self.update = ''
def enter_for(self, update):
self.inside.append(True)
self.update = update
def leave_for(self):
self.inside.pop()
def enter_other(self):
self.inside.append(False)
def leave_other(self):
self.inside.pop()
def is_inside(self):
return self.inside[-1]
class InlineStack:
NAME = 'PyJs_%s_%d_'
def __init__(self):
self.reps = {}
self.names = []
def inject_inlines(self, source):
for lval in self.names: # first in first out! Its important by the way
source = inject_before_lval(source, lval, self.reps[lval])
return source
def require(self, typ):
name = self.NAME % (typ, len(self.names))
self.names.append(name)
return name
def define(self, name, val):
self.reps[name] = val
def reset(self):
self.rel = {}
self.names = []
class ContextStack:
def __init__(self):
self.to_register = set([])
self.to_define = {}
def reset(self):
self.to_register = set([])
self.to_define = {}
def register(self, var):
self.to_register.add(var)
def define(self, name, code):
self.to_define[name] = code
self.register(name)
def get_code(self):
code = 'var.registers([%s])\n' % ', '.join(
repr(e) for e in self.to_register)
for name, func_code in six.iteritems(self.to_define):
code += func_code
return code
def clean_stacks():
global Context, inline_stack
Context = ContextStack()
inline_stack = InlineStack()
def to_key(literal_or_identifier):
''' returns string representation of this object'''
if literal_or_identifier['type'] == 'Identifier':
return literal_or_identifier['name']
elif literal_or_identifier['type'] == 'Literal':
k = literal_or_identifier['value']
if isinstance(k, float):
return unicode(float_repr(k))
elif 'regex' in literal_or_identifier:
return compose_regex(k)
elif isinstance(k, bool):
return 'true' if k else 'false'
elif k is None:
return 'null'
else:
return unicode(k)
def trans(ele, standard=False):
"""Translates esprima syntax tree to python by delegating to appropriate translating node"""
try:
node = globals().get(ele['type'])
if not node:
raise NotImplementedError('%s is not supported!' % ele['type'])
if standard:
node = node.__dict__[
'standard'] if 'standard' in node.__dict__ else node
return node(**ele)
except:
#print ele
raise
def limited(func):
'''Decorator limiting resulting line length in order to avoid python parser stack overflow -
If expression longer than LINE_LEN_LIMIT characters then it will be moved to upper line
USE ONLY ON EXPRESSIONS!!! '''
def f(standard=False, **args):
insert_pos = len(
inline_stack.names
) # in case line is longer than limit we will have to insert the lval at current position
# this is because calling func will change inline_stack.
# we cant use inline_stack.require here because we dont know whether line overflows yet
res = func(**args)
if len(res) > LINE_LEN_LIMIT:
name = inline_stack.require('LONG')
inline_stack.names.pop()
inline_stack.names.insert(insert_pos, name)
res = 'def %s(var=var):\n return %s\n' % (name, res)
inline_stack.define(name, res)
return name + '()'
else:
return res
f.__dict__['standard'] = func
return f
# ==== IDENTIFIERS AND LITERALS =======
inf = float('inf')
def Literal(type, value, raw, regex=None, comments=None):
if regex: # regex
return 'JsRegExp(%s)' % repr(compose_regex(value))
elif value is None: # null
return 'var.get(u"null")'
# Todo template
# String, Bool, Float
return 'Js(%s)' % repr(value) if value != inf else 'Js(float("inf"))'
def Identifier(type, name, comments=None):
return 'var.get(%s)' % repr(name)
@limited
def MemberExpression(type, computed, object, property, comments=None):
far_left = trans(object)
if computed: # obj[prop] type accessor
# may be literal which is the same in every case so we can save some time on conversion
if property['type'] == 'Literal':
prop = repr(to_key(property))
else: # worst case
prop = trans(property)
else: # always the same since not computed (obj.prop accessor)
prop = repr(to_key(property))
return far_left + '.get(%s)' % prop
def ThisExpression(type, comments=None):
return 'var.get(u"this")'
@limited
def CallExpression(type, callee, arguments, comments=None):
arguments = [trans(e) for e in arguments]
if callee['type'] == 'MemberExpression':
far_left = trans(callee['object'])
if callee['computed']: # obj[prop] type accessor
# may be literal which is the same in every case so we can save some time on conversion
if callee['property']['type'] == 'Literal':
prop = repr(to_key(callee['property']))
else: # worst case
prop = trans(
callee['property']) # its not a string literal! so no repr
else: # always the same since not computed (obj.prop accessor)
prop = repr(to_key(callee['property']))
arguments.insert(0, prop)
return far_left + '.callprop(%s)' % ', '.join(arguments)
else: # standard call
return trans(callee) + '(%s)' % ', '.join(arguments)
# ========== ARRAYS ============
def ArrayExpression(type, elements, comments=None): # todo fix null inside problem
return 'Js([%s])' % ', '.join(trans(e) if e else 'None' for e in elements)
# ========== OBJECTS =============
def ObjectExpression(type, properties, comments=None):
name = inline_stack.require('Object')
elems = []
after = ''
for p in properties:
if p['kind'] == 'init':
elems.append('%s:%s' % Property(**p))
elif p['kind'] == 'set':
k, setter = Property(
**p
) # setter is just a lval referring to that function, it will be defined in InlineStack automatically
after += '%s.define_own_property(%s, {"set":%s, "configurable":True, "enumerable":True})\n' % (
name, k, setter)
elif p['kind'] == 'get':
k, getter = Property(**p)
after += '%s.define_own_property(%s, {"get":%s, "configurable":True, "enumerable":True})\n' % (
name, k, getter)
else:
raise RuntimeError('Unexpected object propery kind')
obj = '%s = Js({%s})\n' % (name, ','.join(elems))
inline_stack.define(name, obj + after)
return name
def Property(type, kind, key, computed, value, method, shorthand, comments=None):
if shorthand or computed:
raise NotImplementedError(
'Shorthand and Computed properties not implemented!')
k = to_key(key)
if k is None:
raise SyntaxError('Invalid key in dictionary! Or bug in Js2Py')
v = trans(value)
return repr(k), v
# ========== EXPRESSIONS ============
@limited
def UnaryExpression(type, operator, argument, prefix, comments=None):
a = trans(
argument, standard=True
) # unary involve some complex operations so we cant use line shorteners here
if operator == 'delete':
if argument['type'] in ('Identifier', 'MemberExpression'):
# means that operation is valid
return js_delete(a)
return 'PyJsComma(%s, Js(True))' % a # otherwise not valid, just perform expression and return true.
elif operator == 'typeof':
return js_typeof(a)
return UNARY[operator](a)
@limited
def BinaryExpression(type, operator, left, right, comments=None):
a = trans(left)
b = trans(right)
# delegate to our friends
return BINARY[operator](a, b)
@limited
def UpdateExpression(type, operator, argument, prefix, comments=None):
a = trans(
argument, standard=True
) # also complex operation involving parsing of the result so no line length reducing here
return js_postfix(a, operator == '++', not prefix)
@limited
def AssignmentExpression(type, operator, left, right, comments=None):
operator = operator[:-1]
if left['type'] == 'Identifier':
if operator:
return 'var.put(%s, %s, %s)' % (repr(to_key(left)), trans(right),
repr(operator))
else:
return 'var.put(%s, %s)' % (repr(to_key(left)), trans(right))
elif left['type'] == 'MemberExpression':
far_left = trans(left['object'])
if left['computed']: # obj[prop] type accessor
# may be literal which is the same in every case so we can save some time on conversion
if left['property']['type'] == 'Literal':
prop = repr(to_key(left['property']))
else: # worst case
prop = trans(
left['property']) # its not a string literal! so no repr
else: # always the same since not computed (obj.prop accessor)
prop = repr(to_key(left['property']))
if operator:
return far_left + '.put(%s, %s, %s)' % (prop, trans(right),
repr(operator))
else:
return far_left + '.put(%s, %s)' % (prop, trans(right))
else:
raise SyntaxError('Invalid left hand side in assignment!')
six
@limited
def SequenceExpression(type, expressions, comments=None):
return reduce(js_comma, (trans(e) for e in expressions))
@limited
def NewExpression(type, callee, arguments, comments=None):
return trans(callee) + '.create(%s)' % ', '.join(
trans(e) for e in arguments)
@limited
def ConditionalExpression(
type, test, consequent,
alternate, comments=None): # caused plenty of problems in my home-made translator :)
return '(%s if %s else %s)' % (trans(consequent), trans(test),
trans(alternate))
# =========== STATEMENTS =============
def BlockStatement(type, body, comments=None):
return StatementList(
body) # never returns empty string! In the worst case returns pass\n
def ExpressionStatement(type, expression, comments=None):
return trans(expression) + '\n' # end expression space with new line
def BreakStatement(type, label, comments=None):
if label:
return 'raise %s("Breaked")\n' % (get_break_label(label['name']))
else:
return 'break\n'
def ContinueStatement(type, label, comments=None):
if label:
return 'raise %s("Continued")\n' % (get_continue_label(label['name']))
else:
return 'continue\n'
def ReturnStatement(type, argument, comments=None):
return 'return %s\n' % (trans(argument)
if argument else "var.get('undefined')")
def EmptyStatement(type, comments=None):
return 'pass\n'
def DebuggerStatement(type, comments=None):
return 'pass\n'
def DoWhileStatement(type, body, test, comments=None):
inside = trans(body) + 'if not %s:\n' % trans(test) + indent('break\n')
result = 'while 1:\n' + indent(inside)
return result
def ForStatement(type, init, test, update, body, comments=None):
update = indent(trans(update)) if update else ''
init = trans(init) if init else ''
if not init.endswith('\n'):
init += '\n'
test = trans(test) if test else '1'
if not update:
result = '#for JS loop\n%swhile %s:\n%s%s\n' % (
init, test, indent(trans(body)), update)
else:
result = '#for JS loop\n%swhile %s:\n' % (init, test)
body = 'try:\n%sfinally:\n %s\n' % (indent(trans(body)), update)
result += indent(body)
return result
def ForInStatement(type, left, right, body, each, comments=None):
res = 'for PyJsTemp in %s:\n' % trans(right)
if left['type'] == "VariableDeclaration":
addon = trans(left) # make sure variable is registered
if addon != 'pass\n':
res = addon + res # we have to execute this expression :(
# now extract the name
try:
name = left['declarations'][0]['id']['name']
except:
raise RuntimeError('Unusual ForIn loop')
elif left['type'] == 'Identifier':
name = left['name']
else:
raise RuntimeError('Unusual ForIn loop')
res += indent('var.put(%s, PyJsTemp)\n' % repr(name) + trans(body))
return res
def IfStatement(type, test, consequent, alternate, comments=None):
# NOTE we cannot do elif because function definition inside elif statement would not be possible!
IF = 'if %s:\n' % trans(test)
IF += indent(trans(consequent))
if not alternate:
return IF
ELSE = 'else:\n' + indent(trans(alternate))
return IF + ELSE
def LabeledStatement(type, label, body, comments=None):
# todo consider using smarter approach!
inside = trans(body)
defs = ''
if inside.startswith('while ') or inside.startswith(
'for ') or inside.startswith('#for'):
# we have to add contine label as well...
# 3 or 1 since #for loop type has more lines before real for.
sep = 1 if not inside.startswith('#for') else 3
cont_label = get_continue_label(label['name'])
temp = inside.split('\n')
injected = 'try:\n' + '\n'.join(temp[sep:])
injected += 'except %s:\n pass\n' % cont_label
inside = '\n'.join(temp[:sep]) + '\n' + indent(injected)
defs += 'class %s(Exception): pass\n' % cont_label
break_label = get_break_label(label['name'])
inside = 'try:\n%sexcept %s:\n pass\n' % (indent(inside), break_label)
defs += 'class %s(Exception): pass\n' % break_label
return defs + inside
def StatementList(lis, comments=None):
if lis: # ensure we don't return empty string because it may ruin indentation!
code = ''.join(trans(e) for e in lis)
return code if code else 'pass\n'
else:
return 'pass\n'
def PyimportStatement(type, imp, comments=None):
lib = imp['name']
jlib = 'PyImport_%s' % lib
code = 'import %s as %s\n' % (lib, jlib)
#check whether valid lib name...
try:
compile(code, '', 'exec')
except:
raise SyntaxError(
'Invalid Python module name (%s) in pyimport statement' % lib)
# var.pyimport will handle module conversion to PyJs object
code += 'var.pyimport(%s, %s)\n' % (repr(lib), jlib)
return code
def SwitchStatement(type, discriminant, cases, comments=None):
#TODO there will be a problem with continue in a switch statement.... FIX IT
code = 'while 1:\n' + indent('SWITCHED = False\nCONDITION = (%s)\n')
code = code % trans(discriminant)
for case in cases:
case_code = None
if case['test']: # case (x):
case_code = 'if SWITCHED or PyJsStrictEq(CONDITION, %s):\n' % (
trans(case['test']))
else: # default:
case_code = 'if True:\n'
case_code += indent('SWITCHED = True\n')
case_code += indent(StatementList(case['consequent']))
# one more indent for whole
code += indent(case_code)
# prevent infinite loop and sort out nested switch...
code += indent('SWITCHED = True\nbreak\n')
return code
def ThrowStatement(type, argument, comments=None):
return 'PyJsTempException = JsToPyException(%s)\nraise PyJsTempException\n' % trans(
argument)
def TryStatement(type, block, handler, handlers, guardedHandlers, finalizer, comments=None):
result = 'try:\n%s' % indent(trans(block))
# complicated catch statement...
if handler:
identifier = handler['param']['name']
holder = 'PyJsHolder_%s_%d' % (to_hex(identifier),
random.randrange(1e8))
identifier = repr(identifier)
result += 'except PyJsException as PyJsTempException:\n'
# fill in except ( catch ) block and remember to recover holder variable to its previous state
result += indent(
TRY_CATCH.replace('HOLDER',
holder).replace('NAME', identifier).replace(
'BLOCK', indent(trans(handler['body']))))
# translate finally statement if present
if finalizer:
result += 'finally:\n%s' % indent(trans(finalizer))
return result
def LexicalDeclaration(type, declarations, kind, comments=None):
raise NotImplementedError(
'let and const not implemented yet but they will be soon! Check github for updates.'
)
def VariableDeclarator(type, id, init, comments=None):
name = id['name']
# register the name if not already registered
Context.register(name)
if init:
return 'var.put(%s, %s)\n' % (repr(name), trans(init))
return ''
def VariableDeclaration(type, declarations, kind, comments=None):
code = ''.join(trans(d) for d in declarations)
return code if code else 'pass\n'
def WhileStatement(type, test, body, comments=None):
result = 'while %s:\n' % trans(test) + indent(trans(body))
return result
def WithStatement(type, object, body, comments=None):
raise NotImplementedError('With statement not implemented!')
def Program(type, body, comments=None):
inline_stack.reset()
code = ''.join(trans(e) for e in body)
# here add hoisted elements (register variables and define functions)
code = Context.get_code() + code
# replace all inline variables
code = inline_stack.inject_inlines(code)
return code
# ======== FUNCTIONS ============
def FunctionDeclaration(type, id, params, defaults, body, generator,
expression, comments=None):
if generator:
raise NotImplementedError('Generators not supported')
if defaults:
raise NotImplementedError('Defaults not supported')
if not id:
return FunctionExpression(type, id, params, defaults, body, generator,
expression) + '\n'
JsName = id['name']
PyName = 'PyJsHoisted_%s_' % JsName
PyName = PyName if is_valid_py_name(PyName) else 'PyJsHoistedNonPyName'
# this is quite complicated
global Context
previous_context = Context
# change context to the context of this function
Context = ContextStack()
# translate body within current context
code = trans(body)
# get arg names
vars = [v['name'] for v in params]
# args are automaticaly registered variables
Context.to_register.update(vars)
# add all hoisted elements inside function
code = Context.get_code() + code
# check whether args are valid python names:
used_vars = []
for v in vars:
if is_valid_py_name(v):
used_vars.append(v)
else: # invalid arg in python, for example $, replace with alternatice arg
used_vars.append('PyJsArg_%s_' % to_hex(v))
header = '@Js\n'
header += 'def %s(%sthis, arguments, var=var):\n' % (
PyName, ', '.join(used_vars) + (', ' if vars else ''))
# transfer names from Py scope to Js scope
arg_map = dict(zip(vars, used_vars))
arg_map.update({'this': 'this', 'arguments': 'arguments'})
arg_conv = 'var = Scope({%s}, var)\n' % ', '.join(
repr(k) + ':' + v for k, v in six.iteritems(arg_map))
# and finally set the name of the function to its real name:
footer = '%s.func_name = %s\n' % (PyName, repr(JsName))
footer += 'var.put(%s, %s)\n' % (repr(JsName), PyName)
whole_code = header + indent(arg_conv + code) + footer
# restore context
Context = previous_context
# define in upper context
Context.define(JsName, whole_code)
return 'pass\n'
def FunctionExpression(type, id, params, defaults, body, generator,
expression, comments=None):
if generator:
raise NotImplementedError('Generators not supported')
if defaults:
raise NotImplementedError('Defaults not supported')
JsName = id['name'] if id else 'anonymous'
if not is_valid_py_name(JsName):
ScriptName = 'InlineNonPyName'
else:
ScriptName = JsName
PyName = inline_stack.require(ScriptName) # this is unique
# again quite complicated
global Context
previous_context = Context
# change context to the context of this function
Context = ContextStack()
# translate body within current context
code = trans(body)
# get arg names
vars = [v['name'] for v in params]
# args are automaticaly registered variables
Context.to_register.update(vars)
# add all hoisted elements inside function
code = Context.get_code() + code
# check whether args are valid python names:
used_vars = []
for v in vars:
if is_valid_py_name(v):
used_vars.append(v)
else: # invalid arg in python, for example $, replace with alternatice arg
used_vars.append('PyJsArg_%s_' % to_hex(v))
header = '@Js\n'
header += 'def %s(%sthis, arguments, var=var):\n' % (
PyName, ', '.join(used_vars) + (', ' if vars else ''))
# transfer names from Py scope to Js scope
arg_map = dict(zip(vars, used_vars))
arg_map.update({'this': 'this', 'arguments': 'arguments'})
if id: # make self available from inside...
if id['name'] not in arg_map:
arg_map[id['name']] = PyName
arg_conv = 'var = Scope({%s}, var)\n' % ', '.join(
repr(k) + ':' + v for k, v in six.iteritems(arg_map))
# and finally set the name of the function to its real name:
footer = '%s._set_name(%s)\n' % (PyName, repr(JsName))
whole_code = header + indent(arg_conv + code) + footer
# restore context
Context = previous_context
# define in upper context
inline_stack.define(PyName, whole_code)
return PyName
LogicalExpression = BinaryExpression
PostfixExpression = UpdateExpression
clean_stacks()
if __name__ == '__main__':
import codecs
import time
import pyjsparser
c = None #'''`ijfdij`'''
if not c:
with codecs.open("esp.js", "r", "utf-8") as f:
c = f.read()
print('Started')
t = time.time()
res = trans(pyjsparser.PyJsParser().parse(c))
dt = time.time() - t + 0.000000001
print('Translated everyting in', round(dt, 5), 'seconds.')
print('Thats %d characters per second' % int(len(c) / dt))
with open('res.py', 'w') as f:
f.write(res)
+189
View File
@@ -0,0 +1,189 @@
import pyjsparser
import pyjsparser.parser
from . import translating_nodes
import hashlib
import re
# Enable Js2Py exceptions and pyimport in parser
pyjsparser.parser.ENABLE_PYIMPORT = True
# the re below is how we'll recognise numeric constants.
# it finds any 'simple numeric that is not preceded with an alphanumeric character
# the numeric can be a float (so a dot is found) but
# it does not recognise notation such as 123e5, 0xFF, infinity or NaN
CP_NUMERIC_RE = re.compile(r'(?<![a-zA-Z0-9_"\'])([0-9\.]+)')
CP_NUMERIC_PLACEHOLDER = '__PyJsNUM_%i_PyJsNUM__'
CP_NUMERIC_PLACEHOLDER_REVERSE_RE = re.compile(
CP_NUMERIC_PLACEHOLDER.replace('%i', r'([0-9\.]+)'))
# the re below is how we'll recognise string constants
# it finds a ' or ", then reads until the next matching ' or "
# this re only services simple cases, it can not be used when
# there are escaped quotes in the expression
#CP_STRING_1 = re.compile(r'(["\'])(.*?)\1') # this is how we'll recognise string constants
CP_STRING = '"([^\\\\"]+|\\\\([bfnrtv\'"\\\\]|[0-3]?[0-7]{1,2}|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}))*"|\'([^\\\\\']+|\\\\([bfnrtv\'"\\\\]|[0-3]?[0-7]{1,2}|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}))*\''
CP_STRING_RE = re.compile(
CP_STRING) # this is how we'll recognise string constants
CP_STRING_PLACEHOLDER = '__PyJsSTR_%i_PyJsSTR__'
CP_STRING_PLACEHOLDER_REVERSE_RE = re.compile(
CP_STRING_PLACEHOLDER.replace('%i', r'([0-9\.]+)'))
cache = {}
# This crap is still needed but I removed it for speed reasons. Have to think ofa better idea
# import js2py.pyjs, sys
# # Redefine builtin objects... Do you have a better idea?
# for m in list(sys.modules):
# if m.startswith('js2py'):
# del sys.modules[m]
# del js2py.pyjs
# del js2py
DEFAULT_HEADER = u'''from js2py.pyjs import *
# setting scope
var = Scope( JS_BUILTINS )
set_global_object(var)
# Code follows:
'''
def dbg(x):
"""does nothing, legacy dummy function"""
return ''
def translate_js(js, HEADER=DEFAULT_HEADER, use_compilation_plan=False):
"""js has to be a javascript source code.
returns equivalent python code."""
if use_compilation_plan and not '//' in js and not '/*' in js:
return translate_js_with_compilation_plan(js, HEADER=HEADER)
parser = pyjsparser.PyJsParser()
parsed = parser.parse(js) # js to esprima syntax tree
# Another way of doing that would be with my auto esprima translation but its much slower and causes import problems:
# parsed = esprima.parse(js).to_dict()
translating_nodes.clean_stacks()
return HEADER + translating_nodes.trans(
parsed) # syntax tree to python code
class match_unumerator(object):
"""This class ise used """
matchcount = -1
def __init__(self, placeholder_mask):
self.placeholder_mask = placeholder_mask
self.matches = []
def __call__(self, match):
self.matchcount += 1
self.matches.append(match.group(0))
return self.placeholder_mask % self.matchcount
def __repr__(self):
return '\n'.join(self.placeholder_mask % counter + '=' + match
for counter, match in enumerate(self.matches))
def wrap_up(self, output):
for counter, value in enumerate(self.matches):
output = output.replace(
"u'" + self.placeholder_mask % (counter) + "'", value, 1)
return output
def get_compilation_plan(js):
match_increaser_str = match_unumerator(CP_STRING_PLACEHOLDER)
compilation_plan = re.sub(CP_STRING, match_increaser_str, js)
match_increaser_num = match_unumerator(CP_NUMERIC_PLACEHOLDER)
compilation_plan = re.sub(CP_NUMERIC_RE, match_increaser_num,
compilation_plan)
# now put quotes, note that just patching string replaces is somewhat faster than
# using another re:
compilation_plan = compilation_plan.replace(
'__PyJsNUM_', '"__PyJsNUM_').replace('_PyJsNUM__', '_PyJsNUM__"')
compilation_plan = compilation_plan.replace(
'__PyJsSTR_', '"__PyJsSTR_').replace('_PyJsSTR__', '_PyJsSTR__"')
return match_increaser_str, match_increaser_num, compilation_plan
def translate_js_with_compilation_plan(js, HEADER=DEFAULT_HEADER):
"""js has to be a javascript source code.
returns equivalent python code.
compile plans only work with the following restrictions:
- only enabled for oneliner expressions
- when there are comments in the js code string substitution is disabled
- when there nested escaped quotes string substitution is disabled, so
cacheable:
Q1 == 1 && name == 'harry'
not cacheable:
Q1 == 1 && name == 'harry' // some comment
not cacheable:
Q1 == 1 && name == 'o\'Reilly'
not cacheable:
Q1 == 1 && name /* some comment */ == 'o\'Reilly'
"""
match_increaser_str, match_increaser_num, compilation_plan = get_compilation_plan(
js)
cp_hash = hashlib.md5(compilation_plan.encode('utf-8')).digest()
try:
python_code = cache[cp_hash]['proto_python_code']
except:
parser = pyjsparser.PyJsParser()
parsed = parser.parse(compilation_plan) # js to esprima syntax tree
# Another way of doing that would be with my auto esprima translation but its much slower and causes import problems:
# parsed = esprima.parse(js).to_dict()
translating_nodes.clean_stacks()
python_code = translating_nodes.trans(
parsed) # syntax tree to python code
cache[cp_hash] = {
'compilation_plan': compilation_plan,
'proto_python_code': python_code,
}
python_code = match_increaser_str.wrap_up(python_code)
python_code = match_increaser_num.wrap_up(python_code)
return HEADER + python_code
def trasnlate(js, HEADER=DEFAULT_HEADER):
"""js has to be a javascript source code.
returns equivalent python code.
Equivalent to translate_js"""
return translate_js(js, HEADER)
syntax_tree_translate = translating_nodes.trans
if __name__ == '__main__':
PROFILE = False
import js2py
import codecs
def main():
with codecs.open("esprima.js", "r", "utf-8") as f:
d = f.read()
r = js2py.translate_js(d)
with open('res.py', 'wb') as f2:
f2.write(r)
exec (r, {})
if PROFILE:
import cProfile
cProfile.run('main()', sort='tottime')
else:
main()