KoD 1.7.7

- fix di routine ai canali/server\n\n
This commit is contained in:
marco
2023-06-30 19:39:03 +02:00
parent c3e02636fb
commit d29efd4ec2
68 changed files with 1784 additions and 543 deletions

View File

@@ -120,7 +120,7 @@ def update_title(item):
if item.from_title_tmdb: del item.from_title_tmdb
if not item.from_update and item.from_title: del item.from_title
if item.contentSerieName: # We copy the title to serve as a reference in the "Complete Information" menu
if item.contentSerieName and item.contentType == 'tvshow': # We copy the title to serve as a reference in the "Complete Information" menu
item.infoLabels['originaltitle'] = item.contentSerieName
item.contentTitle = item.contentSerieName
else:

View File

@@ -126,7 +126,7 @@ def HJs(val):
except Exception as e:
message = 'your Python function failed! '
try:
message += e.message
message += str(e)
except:
pass
raise MakeError('Error', message)
@@ -319,7 +319,7 @@ class PyJs(object):
#prop = prop.value
if self.Class == 'Undefined' or self.Class == 'Null':
raise MakeError('TypeError',
'Undefined and null dont have properties!')
'Undefined and null dont have properties (tried getting property %s)' % repr(prop))
if not isinstance(prop, basestring):
prop = prop.to_string().value
if not isinstance(prop, basestring): raise RuntimeError('Bug')
@@ -361,7 +361,7 @@ class PyJs(object):
* / % + - << >> & ^ |'''
if self.Class == 'Undefined' or self.Class == 'Null':
raise MakeError('TypeError',
'Undefined and null dont have properties!')
'Undefined and null don\'t have properties (tried setting property %s)' % repr(prop))
if not isinstance(prop, basestring):
prop = prop.to_string().value
if NUMPY_AVAILABLE and prop.isdigit():
@@ -991,7 +991,8 @@ class PyJs(object):
cand = self.get(prop)
if not cand.is_callable():
raise MakeError('TypeError',
'%s is not a function' % cand.typeof())
'%s is not a function (tried calling property %s of %s)' % (
cand.typeof(), repr(prop), repr(self.Class)))
return cand.call(self, args)
def to_python(self):
@@ -1304,7 +1305,7 @@ class PyObjectWrapper(PyJs):
except Exception as e:
message = 'your Python function failed! '
try:
message += e.message
message += str(e)
except:
pass
raise MakeError('Error', message)
@@ -1464,9 +1465,11 @@ class PyJsFunction(PyJs):
except NotImplementedError:
raise
except RuntimeError as e: # maximum recursion
raise MakeError(
'RangeError', e.message if
not isinstance(e, NotImplementedError) else 'Not implemented!')
try:
msg = e.message
except:
msg = repr(e)
raise MakeError('RangeError', msg)
def has_instance(self, other):
# I am not sure here so instanceof may not work lol.

View File

@@ -32,8 +32,7 @@ def UTC(year, month, date, hours, minutes, seconds, ms): # todo complete this
mili = args[6].to_number() if l > 6 else Js(0)
if not y.is_nan() and 0 <= y.value <= 99:
y = y + Js(1900)
t = TimeClip(MakeDate(MakeDay(y, m, dt), MakeTime(h, mi, sec, mili)))
return PyJsDate(t, prototype=DatePrototype)
return TimeClip(MakeDate(MakeDay(y, m, dt), MakeTime(h, mi, sec, mili)))
@Js
@@ -76,11 +75,12 @@ class PyJsDate(PyJs):
# todo fix this problematic datetime part
def to_local_dt(self):
return datetime.datetime.utcfromtimestamp(
UTCToLocal(self.value) // 1000)
return datetime.datetime(1970, 1, 1) + datetime.timedelta(
seconds=UTCToLocal(self.value) // 1000)
def to_utc_dt(self):
return datetime.datetime.utcfromtimestamp(self.value // 1000)
return datetime.datetime(1970, 1, 1) + datetime.timedelta(
seconds=self.value // 1000)
def local_strftime(self, pattern):
if self.value is NaN:
@@ -118,21 +118,40 @@ class PyJsDate(PyJs):
def parse_date(py_string): # todo support all date string formats
try:
date_formats = (
"%Y-%m-%d",
"%m/%d/%Y",
"%b %d %Y",
)
# Supports these hour formats and with or hour.
hour_formats = (
"T%H:%M:%S.%f",
"T%H:%M:%S",
) + ('',)
# Supports with or without Z indicator.
z_formats = ("Z",) + ('',)
supported_formats = [
d + t + z
for d in date_formats
for t in hour_formats
for z in z_formats
]
for date_format in supported_formats:
try:
dt = datetime.datetime.strptime(py_string, "%Y-%m-%dT%H:%M:%S.%fZ")
except:
dt = datetime.datetime.strptime(py_string, "%Y-%m-%dT%H:%M:%SZ")
return MakeDate(
MakeDay(Js(dt.year), Js(dt.month - 1), Js(dt.day)),
MakeTime(
Js(dt.hour), Js(dt.minute), Js(dt.second),
Js(dt.microsecond // 1000)))
except:
raise MakeError(
'TypeError',
'Could not parse date %s - unsupported date format. Currently only supported format is RFC3339 utc. Sorry!'
% py_string)
dt = datetime.datetime.strptime(py_string, date_format)
except ValueError:
continue
else:
return MakeDate(
MakeDay(Js(dt.year), Js(dt.month - 1), Js(dt.day)),
MakeTime(
Js(dt.hour), Js(dt.minute), Js(dt.second),
Js(dt.microsecond // 1000)))
raise MakeError(
'TypeError',
'Could not parse date %s - unsupported date format. Currently only supported formats are RFC3339 utc, ISO Date, Short Date, and Long Date. Sorry!'
% py_string)
def date_constructor(*args):
@@ -332,7 +351,7 @@ class DateProto:
check_date(this)
t = UTCToLocal(this.value)
tim = MakeTime(
HourFromTime(t), MinFromTime(t), SecFromTime(t), ms.to_int())
Js(HourFromTime(t)), Js(MinFromTime(t)), Js(SecFromTime(t)), ms)
u = TimeClip(LocalToUTC(MakeDate(Day(t), tim)))
this.value = u
return u
@@ -341,12 +360,164 @@ class DateProto:
check_date(this)
t = this.value
tim = MakeTime(
HourFromTime(t), MinFromTime(t), SecFromTime(t), ms.to_int())
Js(HourFromTime(t)), Js(MinFromTime(t)), Js(SecFromTime(t)), ms)
u = TimeClip(MakeDate(Day(t), tim))
this.value = u
return u
# todo Complete all setters!
def setSeconds(sec, ms=None):
check_date(this)
t = UTCToLocal(this.value)
s = sec.to_number()
if not ms is None: milli = Js(msFromTime(t))
else: milli = ms.to_number()
date = MakeDate(
Day(t), MakeTime(Js(HourFromTime(t)), Js(MinFromTime(t)), s, milli))
u = TimeClip(LocalToUTC(date))
this.value = u
return u
def setUTCSeconds(sec, ms=None):
check_date(this)
t = this.value
s = sec.to_number()
if not ms is None: milli = Js(msFromTime(t))
else: milli = ms.to_number()
date = MakeDate(
Day(t), MakeTime(Js(HourFromTime(t)), Js(MinFromTime(t)), s, milli))
v = TimeClip(date)
this.value = v
return v
def setMinutes(min, sec=None, ms=None):
check_date(this)
t = UTCToLocal(this.value)
m = min.to_number()
if not sec is None: s = Js(SecFromTime(t))
else: s = sec.to_number()
if not ms is None: milli = Js(msFromTime(t))
else: milli = ms.to_number()
date = MakeDate(Day(t), MakeTime(Js(HourFromTime(t)), m, s, milli))
u = TimeClip(LocalToUTC(date))
this.value = u
return u
def setUTCMinutes(min, sec=None, ms=None):
check_date(this)
t = this.value
m = min.to_number()
if not sec is None: s = Js(SecFromTime(t))
else: s = sec.to_number()
if not ms is None: milli = Js(msFromTime(t))
else: milli = ms.to_number()
date = MakeDate(Day(t), MakeTime(Js(HourFromTime(t)), m, s, milli))
v = TimeClip(date)
this.value = v
return v
def setHours(hour, min=None, sec=None, ms=None):
check_date(this)
t = UTCToLocal(this.value)
h = hour.to_number()
if not min is None: m = Js(MinFromTime(t))
else: m = min.to_number()
if not sec is None: s = Js(SecFromTime(t))
else: s = sec.to_number()
if not ms is None: milli = Js(msFromTime(t))
else: milli = ms.to_number()
date = MakeDate(Day(t), MakeTime(h, m, s, milli))
u = TimeClip(LocalToUTC(date))
this.value = u
return u
def setUTCHours(hour, min=None, sec=None, ms=None):
check_date(this)
t = this.value
h = hour.to_number()
if not min is None: m = Js(MinFromTime(t))
else: m = min.to_number()
if not sec is None: s = Js(SecFromTime(t))
else: s = sec.to_number()
if not ms is None: milli = Js(msFromTime(t))
else: milli = ms.to_number()
date = MakeDate(Day(t), MakeTime(h, m, s, milli))
v = TimeClip(date)
this.value = v
return v
def setDate(date):
check_date(this)
t = UTCToLocal(this.value)
dt = date.to_number()
newDate = MakeDate(
MakeDay(Js(YearFromTime(t)), Js(MonthFromTime(t)), dt), TimeWithinDay(t))
u = TimeClip(LocalToUTC(newDate))
this.value = u
return u
def setUTCDate(date):
check_date(this)
t = this.value
dt = date.to_number()
newDate = MakeDate(
MakeDay(Js(YearFromTime(t)), Js(MonthFromTime(t)), dt), TimeWithinDay(t))
v = TimeClip(newDate)
this.value = v
return v
def setMonth(month, date=None):
check_date(this)
t = UTCToLocal(this.value)
m = month.to_number()
if not date is None: dt = Js(DateFromTime(t))
else: dt = date.to_number()
newDate = MakeDate(
MakeDay(Js(YearFromTime(t)), m, dt), TimeWithinDay(t))
u = TimeClip(LocalToUTC(newDate))
this.value = u
return u
def setUTCMonth(month, date=None):
check_date(this)
t = this.value
m = month.to_number()
if not date is None: dt = Js(DateFromTime(t))
else: dt = date.to_number()
newDate = MakeDate(
MakeDay(Js(YearFromTime(t)), m, dt), TimeWithinDay(t))
v = TimeClip(newDate)
this.value = v
return v
def setFullYear(year, month=None, date=None):
check_date(this)
if not this.value is NaN: t = UTCToLocal(this.value)
else: t = 0
y = year.to_number()
if not month is None: m = Js(MonthFromTime(t))
else: m = month.to_number()
if not date is None: dt = Js(DateFromTime(t))
else: dt = date.to_number()
newDate = MakeDate(
MakeDay(y, m, dt), TimeWithinDay(t))
u = TimeClip(LocalToUTC(newDate))
this.value = u
return u
def setUTCFullYear(year, month=None, date=None):
check_date(this)
if not this.value is NaN: t = UTCToLocal(this.value)
else: t = 0
y = year.to_number()
if not month is None: m = Js(MonthFromTime(t))
else: m = month.to_number()
if not date is None: dt = Js(DateFromTime(t))
else: dt = date.to_number()
newDate = MakeDate(
MakeDay(y, m, dt), TimeWithinDay(t))
v = TimeClip(newDate)
this.value = v
return v
def toUTCString():
check_date(this)

View File

@@ -36,8 +36,8 @@ def DaylightSavingTA(t):
return t
try:
return int(
LOCAL_ZONE.dst(datetime.datetime.utcfromtimestamp(
t // 1000)).seconds) * 1000
LOCAL_ZONE.dst(datetime.datetime(1970, 1, 1) + datetime.timedelta(
seconds=t // 1000)).seconds) * 1000
except:
warnings.warn(
'Invalid datetime date, assumed DST time, may be inaccurate...',

View File

@@ -53,7 +53,7 @@ def write_file_contents(path_or_file, contents):
if hasattr(path_or_file, 'write'):
path_or_file.write(contents)
else:
with open(path_as_local(path_or_file), 'w') as f:
with codecs.open(path_as_local(path_or_file), "w", "utf-8") as f:
f.write(contents)
@@ -238,6 +238,10 @@ class EvalJs(object):
self.execute_debug(code)
return self['PyJsEvalResult']
@property
def context(self):
return self._context
def __getattr__(self, var):
return getattr(self._var, var)
@@ -268,14 +272,3 @@ class EvalJs(object):
else:
sys.stderr.write('EXCEPTION: ' + str(e) + '\n')
time.sleep(0.01)
#print x
if __name__ == '__main__':
#with open('C:\Users\Piotrek\Desktop\esprima.js', 'rb') as f:
# x = f.read()
e = EvalJs()
e.execute('square(x)')
#e.execute(x)
e.console()

View File

@@ -6,7 +6,7 @@ def console():
@Js
def log():
print(arguments[0])
print(" ".join(repr(element) for element in arguments.to_list()))
console.put('log', log)
console.put('debug', log)

View File

@@ -0,0 +1 @@
from .seval import eval_js_vm

View File

@@ -602,11 +602,12 @@ class PyJsDate(PyJs):
# todo fix this problematic datetime part
def to_local_dt(self):
return datetime.datetime.utcfromtimestamp(
self.UTCToLocal(self.value) // 1000)
return datetime.datetime(1970, 1, 1) + datetime.timedelta(
seconds=self.UTCToLocal(self.value) // 1000)
def to_utc_dt(self):
return datetime.datetime.utcfromtimestamp(self.value // 1000)
return datetime.datetime(1970, 1, 1) + datetime.timedelta(
seconds=self.value // 1000)
def local_strftime(self, pattern):
if self.value is NaN:

View File

@@ -32,8 +32,7 @@ def UTC(year, month, date, hours, minutes, seconds, ms): # todo complete this
mili = args[6].to_number() if l > 6 else Js(0)
if not y.is_nan() and 0 <= y.value <= 99:
y = y + Js(1900)
t = TimeClip(MakeDate(MakeDay(y, m, dt), MakeTime(h, mi, sec, mili)))
return PyJsDate(t, prototype=DatePrototype)
return TimeClip(MakeDate(MakeDay(y, m, dt), MakeTime(h, mi, sec, mili)))
@Js
@@ -76,11 +75,12 @@ class PyJsDate(PyJs):
# todo fix this problematic datetime part
def to_local_dt(self):
return datetime.datetime.utcfromtimestamp(
UTCToLocal(self.value) // 1000)
return datetime.datetime(1970, 1, 1) + datetime.timedelta(
seconds=UTCToLocal(self.value) // 1000)
def to_utc_dt(self):
return datetime.datetime.utcfromtimestamp(self.value // 1000)
return datetime.datetime(1970, 1, 1) + datetime.timedelta(
seconds=self.value // 1000)
def local_strftime(self, pattern):
if self.value is NaN:
@@ -332,7 +332,7 @@ class DateProto:
check_date(this)
t = UTCToLocal(this.value)
tim = MakeTime(
HourFromTime(t), MinFromTime(t), SecFromTime(t), ms.to_int())
Js(HourFromTime(t)), Js(MinFromTime(t)), Js(SecFromTime(t)), ms)
u = TimeClip(LocalToUTC(MakeDate(Day(t), tim)))
this.value = u
return u
@@ -341,12 +341,164 @@ class DateProto:
check_date(this)
t = this.value
tim = MakeTime(
HourFromTime(t), MinFromTime(t), SecFromTime(t), ms.to_int())
Js(HourFromTime(t)), Js(MinFromTime(t)), Js(SecFromTime(t)), ms)
u = TimeClip(MakeDate(Day(t), tim))
this.value = u
return u
# todo Complete all setters!
def setSeconds(sec, ms=None):
check_date(this)
t = UTCToLocal(this.value)
s = sec.to_number()
if not ms is None: milli = Js(msFromTime(t))
else: milli = ms.to_number()
date = MakeDate(
Day(t), MakeTime(Js(HourFromTime(t)), Js(MinFromTime(t)), s, milli))
u = TimeClip(LocalToUTC(date))
this.value = u
return u
def setUTCSeconds(sec, ms=None):
check_date(this)
t = this.value
s = sec.to_number()
if not ms is None: milli = Js(msFromTime(t))
else: milli = ms.to_number()
date = MakeDate(
Day(t), MakeTime(Js(HourFromTime(t)), Js(MinFromTime(t)), s, milli))
v = TimeClip(date)
this.value = v
return v
def setMinutes(min, sec=None, ms=None):
check_date(this)
t = UTCToLocal(this.value)
m = min.to_number()
if not sec is None: s = Js(SecFromTime(t))
else: s = sec.to_number()
if not ms is None: milli = Js(msFromTime(t))
else: milli = ms.to_number()
date = MakeDate(Day(t), MakeTime(Js(HourFromTime(t)), m, s, milli))
u = TimeClip(LocalToUTC(date))
this.value = u
return u
def setUTCMinutes(min, sec=None, ms=None):
check_date(this)
t = this.value
m = min.to_number()
if not sec is None: s = Js(SecFromTime(t))
else: s = sec.to_number()
if not ms is None: milli = Js(msFromTime(t))
else: milli = ms.to_number()
date = MakeDate(Day(t), MakeTime(Js(HourFromTime(t)), m, s, milli))
v = TimeClip(date)
this.value = v
return v
def setHours(hour, min=None, sec=None, ms=None):
check_date(this)
t = UTCToLocal(this.value)
h = hour.to_number()
if not min is None: m = Js(MinFromTime(t))
else: m = min.to_number()
if not sec is None: s = Js(SecFromTime(t))
else: s = sec.to_number()
if not ms is None: milli = Js(msFromTime(t))
else: milli = ms.to_number()
date = MakeDate(Day(t), MakeTime(h, m, s, milli))
u = TimeClip(LocalToUTC(date))
this.value = u
return u
def setUTCHours(hour, min=None, sec=None, ms=None):
check_date(this)
t = this.value
h = hour.to_number()
if not min is None: m = Js(MinFromTime(t))
else: m = min.to_number()
if not sec is None: s = Js(SecFromTime(t))
else: s = sec.to_number()
if not ms is None: milli = Js(msFromTime(t))
else: milli = ms.to_number()
date = MakeDate(Day(t), MakeTime(h, m, s, milli))
v = TimeClip(date)
this.value = v
return v
def setDate(date):
check_date(this)
t = UTCToLocal(this.value)
dt = date.to_number()
newDate = MakeDate(
MakeDay(Js(YearFromTime(t)), Js(MonthFromTime(t)), dt), TimeWithinDay(t))
u = TimeClip(LocalToUTC(newDate))
this.value = u
return u
def setUTCDate(date):
check_date(this)
t = this.value
dt = date.to_number()
newDate = MakeDate(
MakeDay(Js(YearFromTime(t)), Js(MonthFromTime(t)), dt), TimeWithinDay(t))
v = TimeClip(newDate)
this.value = v
return v
def setMonth(month, date=None):
check_date(this)
t = UTCToLocal(this.value)
m = month.to_number()
if not date is None: dt = Js(DateFromTime(t))
else: dt = date.to_number()
newDate = MakeDate(
MakeDay(Js(YearFromTime(t)), m, dt), TimeWithinDay(t))
u = TimeClip(LocalToUTC(newDate))
this.value = u
return u
def setUTCMonth(month, date=None):
check_date(this)
t = this.value
m = month.to_number()
if not date is None: dt = Js(DateFromTime(t))
else: dt = date.to_number()
newDate = MakeDate(
MakeDay(Js(YearFromTime(t)), m, dt), TimeWithinDay(t))
v = TimeClip(newDate)
this.value = v
return v
def setFullYear(year, month=None, date=None):
check_date(this)
if not this.value is NaN: t = UTCToLocal(this.value)
else: t = 0
y = year.to_number()
if not month is None: m = Js(MonthFromTime(t))
else: m = month.to_number()
if not date is None: dt = Js(DateFromTime(t))
else: dt = date.to_number()
newDate = MakeDate(
MakeDay(y, m, dt), TimeWithinDay(t))
u = TimeClip(LocalToUTC(newDate))
this.value = u
return u
def setUTCFullYear(year, month=None, date=None):
check_date(this)
if not this.value is NaN: t = UTCToLocal(this.value)
else: t = 0
y = year.to_number()
if not month is None: m = Js(MonthFromTime(t))
else: m = month.to_number()
if not date is None: dt = Js(DateFromTime(t))
else: dt = date.to_number()
newDate = MakeDate(
MakeDay(y, m, dt), TimeWithinDay(t))
v = TimeClip(newDate)
this.value = v
return v
def toUTCString():
check_date(this)

View File

@@ -16,7 +16,8 @@ CONSTANTS = {
'SQRT1_2': 0.7071067811865476,
'SQRT2': 1.4142135623730951
}
def is_infinity(x):
return x - 1e10 == x
class MathFunctions:
def abs(this, args):
@@ -65,22 +66,22 @@ class MathFunctions:
def ceil(this, args):
x = get_arg(args, 0)
a = to_number(x)
if a != a: # it must be a nan
return NaN
if not is_finite(x):
return x
return float(math.ceil(a))
def floor(this, args):
x = get_arg(args, 0)
a = to_number(x)
if a != a: # it must be a nan
return NaN
if not is_finite(x):
return x
return float(math.floor(a))
def round(this, args):
x = get_arg(args, 0)
a = to_number(x)
if a != a: # it must be a nan
return NaN
if not is_finite(x):
return x
return float(round(a))
def sin(this, args):

View File

@@ -1,6 +1,6 @@
from ..conversions import *
from ..func_utils import *
from six import unichr
def fromCharCode(this, args):
res = u''

View File

@@ -38,8 +38,8 @@ def DaylightSavingTA(t):
return t
try:
return int(
LOCAL_ZONE.dst(datetime.datetime.utcfromtimestamp(
t // 1000)).seconds) * 1000
LOCAL_ZONE.dst(datetime.datetime(1970, 1, 1) + datetime.timedelta(
seconds=t // 1000)).seconds) * 1000
except:
warnings.warn(
'Invalid datetime date, assumed DST time, may be inaccurate...',

View File

@@ -798,7 +798,7 @@ OP_CODES = {}
g = ''
for g in globals():
try:
if not issubclass(globals()[g], OP_CODE) or g is 'OP_CODE':
if not issubclass(globals()[g], OP_CODE) or g == 'OP_CODE':
continue
except:
continue

View File

@@ -22,6 +22,11 @@ def replacement_template(rep, source, span, npar):
res += '$'
n += 2
continue
elif rep[n + 1] == '&':
# replace with matched string
res += source[span[0]:span[1]]
n += 2
continue
elif rep[n + 1] == '`':
# replace with string that is BEFORE match
res += source[:span[0]]

View File

@@ -1,7 +1,14 @@
from __future__ import print_function
from timeit import timeit
from collections import namedtuple
from array import array
from itertools import izip
try:
#python 2 code
from itertools import izip as zip
except ImportError:
pass
from collections import deque
@@ -47,7 +54,7 @@ t = []
Type = None
try:
print timeit(
print(timeit(
"""
t.append(4)
@@ -56,7 +63,7 @@ t.pop()
""",
"from __main__ import X,Y,namedtuple,array,t,add,Type, izip",
number=1000000)
"from __main__ import X,Y,namedtuple,array,t,add,Type, zip",
number=1000000))
except:
raise

View File

@@ -1,3 +1,4 @@
from __future__ import print_function
from string import ascii_lowercase, digits
##################################
StringName = u'PyJsConstantString%d_'
@@ -305,4 +306,4 @@ if __name__ == '__main__':
''')
t, d = remove_constants(test)
print t, d
print(t, d)

View File

@@ -16,6 +16,8 @@ If case of parsing errors it must return a pos of error.
NOTES:
Strings and other literals are not present so each = means assignment
"""
from __future__ import print_function
from utils import *
from jsparser import *
@@ -80,4 +82,4 @@ def bass_translator(s):
if __name__ == '__main__':
print bass_translator('3.ddsd = 40')
print(bass_translator('3.ddsd = 40'))

View File

@@ -9,6 +9,8 @@ FOR 123
FOR iter
CONTINUE, BREAK, RETURN, LABEL, THROW, TRY, SWITCH
"""
from __future__ import print_function
from utils import *
from jsparser import *
from nodevisitor import exp_translator
@@ -477,4 +479,4 @@ def translate_flow(source):
if __name__ == '__main__':
#print do_dowhile('do {} while(k+f)', 0)[0]
#print 'e: "%s"'%do_expression('++(c?g:h); mj', 0)[0]
print translate_flow('a; yimport test')[0]
print(translate_flow('a; yimport test')[0])

View File

@@ -1,4 +1,6 @@
"""This module removes JS functions from source code"""
from __future__ import print_function
from jsparser import *
from utils import *
@@ -94,5 +96,5 @@ def remove_functions(source, all_inline=False):
if __name__ == '__main__':
print remove_functions(
'5+5 function n (functiona ,functionaj) {dsd s, dsdd}')
print(remove_functions(
'5+5 function n (functiona ,functionaj) {dsd s, dsdd}'))

View File

@@ -45,6 +45,7 @@ TODO
"""
from __future__ import print_function
from utils import *
@@ -64,7 +65,7 @@ OP_METHODS = {
def dbg(source):
try:
with open('C:\Users\Piotrek\Desktop\dbg.py', 'w') as f:
with open(r'C:\Users\Piotrek\Desktop\dbg.py', 'w') as f:
f.write(source)
except:
pass
@@ -77,13 +78,13 @@ def indent(lines, ind=4):
def inject_before_lval(source, lval, code):
if source.count(lval) > 1:
dbg(source)
print
print lval
print()
print(lval)
raise RuntimeError('To many lvals (%s)' % lval)
elif not source.count(lval):
dbg(source)
print
print lval
print()
print(lval)
assert lval not in source
raise RuntimeError('No lval found "%s"' % lval)
end = source.index(lval)

View File

@@ -1,3 +1,5 @@
from __future__ import print_function
from jsparser import *
from utils import *
import re
@@ -557,6 +559,6 @@ if __name__ == '__main__':
#print 'Here', trans('(eee ) . ii [ PyJsMarker ] [ jkj ] ( j , j ) .
# jiji (h , ji , i)(non )( )()()()')
for e in xrange(3):
print exp_translator('jk = kk.ik++')
print(exp_translator('jk = kk.ik++'))
#First line translated with PyJs: PyJsStrictEq(PyJsAdd((Js(100)*Js(50)),Js(30)), Js("5030")), yay!
print exp_translator('delete a.f')
print(exp_translator('delete a.f'))

View File

@@ -1,6 +1,8 @@
""" This module removes all objects/arrays from JS source code and replace them with LVALS.
Also it has s function translating removed object/array to python code.
Use this module just after removing constants. Later move on to removing functions"""
from __future__ import print_function
OBJECT_LVAL = 'PyJsLvalObject%d_'
ARRAY_LVAL = 'PyJsLvalArray%d_'
from utils import *
@@ -180,7 +182,7 @@ def translate_object(obj, lval, obj_count=1, arr_count=1):
try:
key, value = spl
except: #len(spl)> 2
print 'Unusual case ' + repr(e)
print('Unusual case ' + repr(e))
key = spl[0]
value = ':'.join(spl[1:])
key = key.strip()
@@ -293,8 +295,8 @@ if __name__ == '__main__':
#print remove_objects(test)
#print list(bracket_split(' {}'))
print
print remove_arrays(
print()
print(remove_arrays(
'typeof a&&!db.test(a)&&!ib[(bb.exec(a)||["",""], [][[5][5]])[1].toLowerCase()])'
)
print is_object('', ')')
))
print(is_object('', ')'))

View File

@@ -1,3 +1,5 @@
from __future__ import print_function
from flow import translate_flow
from constants import remove_constants, recover_constants
from objects import remove_objects, remove_arrays, translate_object, translate_array, set_func_translator
@@ -148,4 +150,4 @@ if __name__ == '__main__':
#res = translate_js(jq)
res = translate_js(t)
dbg(SANDBOX % indent(res))
print 'Done'
print('Done')

View File

@@ -1,10 +1,16 @@
__all__ = ['require']
import subprocess, os, codecs, glob
from .evaljs import translate_js, DEFAULT_HEADER
from .translators.friendly_nodes import is_valid_py_name
import six
import tempfile
import hashlib
import random
DID_INIT = False
DIRNAME = os.path.dirname(os.path.abspath(__file__))
PY_NODE_MODULES_PATH = os.path.join(DIRNAME, 'py_node_modules')
DIRNAME = tempfile.mkdtemp()
PY_NODE_MODULES_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'py_node_modules')
def _init():
@@ -46,23 +52,33 @@ GET_FROM_GLOBALS_FUNC = '''
'''
def _get_module_py_name(module_name):
return module_name.replace('-', '_')
def _get_module_var_name(module_name):
return _get_module_py_name(module_name).rpartition('/')[-1]
cand = _get_module_py_name(module_name).rpartition('/')[-1]
if not is_valid_py_name(cand):
raise ValueError(
"Invalid Python module name %s (generated from %s). Unsupported/invalid npm module specification?" % (
repr(cand), repr(module_name)))
return cand
def _get_and_translate_npm_module(module_name, include_polyfill=False, update=False):
def _get_and_translate_npm_module(module_name, include_polyfill=False, update=False, maybe_version_str=""):
assert isinstance(module_name, str), 'module_name must be a string!'
py_name = _get_module_py_name(module_name)
module_filename = '%s.py' % py_name
var_name = _get_module_var_name(module_name)
if not os.path.exists(os.path.join(PY_NODE_MODULES_PATH,
module_filename)) or update:
_init()
in_file_name = 'tmp0in439341018923js2py.js'
out_file_name = 'tmp0out439341018923js2py.js'
module_hash = hashlib.sha1(module_name.encode("utf-8")).hexdigest()[:15]
version = random.randrange(10000000000000)
in_file_name = 'in_%s_%d.js' % (module_hash, version)
out_file_name = 'out_%s_%d.js' % (module_hash, version)
code = ADD_TO_GLOBALS_FUNC
if include_polyfill:
code += "\n;require('babel-polyfill');\n"
@@ -74,6 +90,8 @@ def _get_and_translate_npm_module(module_name, include_polyfill=False, update=Fa
f.write(code.encode('utf-8') if six.PY3 else code)
pkg_name = module_name.partition('/')[0]
if maybe_version_str:
pkg_name += '@' + maybe_version_str
# make sure the module is installed
assert subprocess.call(
'cd %s;npm install %s' % (repr(DIRNAME), pkg_name),
@@ -93,7 +111,7 @@ def _get_and_translate_npm_module(module_name, include_polyfill=False, update=Fa
with codecs.open(os.path.join(DIRNAME, out_file_name), "r",
"utf-8") as f:
js_code = f.read()
os.remove(os.path.join(DIRNAME, out_file_name))
print("Bundled JS library dumped at: %s" % os.path.join(DIRNAME, out_file_name))
if len(js_code) < 50:
raise RuntimeError("Candidate JS bundle too short - likely browserify issue.")
js_code += GET_FROM_GLOBALS_FUNC
@@ -117,21 +135,25 @@ def _get_and_translate_npm_module(module_name, include_polyfill=False, update=Fa
return py_code
def require(module_name, include_polyfill=False, update=False, context=None):
def require(module_name, include_polyfill=True, update=False, context=None):
"""
Installs the provided npm module, exports a js bundle via browserify, converts to ECMA 5.1 via babel and
finally translates the generated JS bundle to Python via Js2Py.
Returns a pure python object that behaves like the installed module. Nice!
:param module_name: Name of the npm module to require. For example 'esprima'.
:param module_name: Name of the npm module to require. For example 'esprima'. Supports specific versions via @
specification. Eg: 'crypto-js@3.3'.
:param include_polyfill: Whether the babel-polyfill should be included as part of the translation. May be needed
for some modules that use unsupported features.
for some modules that use unsupported features of JS6 such as Map or typed arrays.
:param update: Whether to force update the translation. Otherwise uses a cached version if exists.
:param context: Optional context in which the translated module should be executed in. If provided, the
header (js2py imports) will be skipped as it is assumed that the context already has all the necessary imports.
:return: The JsObjectWrapper containing the translated module object. Can be used like a standard python object.
"""
py_code = _get_and_translate_npm_module(module_name, include_polyfill=include_polyfill, update=update)
module_name, maybe_version = (module_name+"@@@").split('@')[:2]
py_code = _get_and_translate_npm_module(module_name, include_polyfill=include_polyfill, update=update,
maybe_version_str=maybe_version)
# this is a bit hacky but we need to strip the default header from the generated code...
if context is not None:
if not py_code.startswith(DEFAULT_HEADER):
@@ -141,5 +163,5 @@ def require(module_name, include_polyfill=False, update=False, context=None):
assert py_code.startswith(DEFAULT_HEADER), "Unexpected header."
py_code = py_code[len(DEFAULT_HEADER):]
context = {} if context is None else context
exec (py_code, context)
exec(py_code, context)
return context['var'][_get_module_var_name(module_name)].to_py()

View File

@@ -17,6 +17,11 @@ def replacement_template(rep, source, span, npar):
res += '$'
n += 2
continue
elif rep[n + 1] == '&':
# replace with matched string
res += source[span[0]:span[1]]
n += 2
continue
elif rep[n + 1] == '`':
# replace with string that is BEFORE match
res += source[:span[0]]

View File

@@ -14,26 +14,36 @@ if six.PY3:
LINE_LEN_LIMIT = 400 # 200 # or any other value - the larger the smaller probability of errors :)
class ForController:
class LoopController:
def __init__(self):
self.inside = [False]
self.update = ''
self.update = [""]
self.label_to_update_idx = {}
def enter_for(self, update):
self.inside.append(True)
self.update = update
def enter(self, update=""):
self.update.append(update)
def leave_for(self):
self.inside.pop()
def leave(self):
self.update.pop()
def get_update(self, label=None):
if label is None:
return self.update[-1]
if label not in self.label_to_update_idx:
raise SyntaxError("Undefined label %s" % label)
if self.label_to_update_idx[label] >= len(self.update):
raise SyntaxError("%s is not a iteration statement label?" % label)
return self.update[self.label_to_update_idx[label]]
def register_label(self, label):
if label in self.label_to_update_idx:
raise SyntaxError("label %s already used")
self.label_to_update_idx[label] = len(self.update)
def deregister_label(self, label):
del self.label_to_update_idx[label]
def enter_other(self):
self.inside.append(False)
def leave_other(self):
self.inside.pop()
def is_inside(self):
return self.inside[-1]
class InlineStack:
@@ -86,9 +96,10 @@ class ContextStack:
def clean_stacks():
global Context, inline_stack
global Context, inline_stack, loop_controller
Context = ContextStack()
inline_stack = InlineStack()
loop_controller = LoopController()
def to_key(literal_or_identifier):
@@ -108,6 +119,13 @@ def to_key(literal_or_identifier):
else:
return unicode(k)
def is_iteration_statement(cand):
if not isinstance(cand, dict):
# Multiple statements.
return False
return cand.get("type", "?") in {"ForStatement", "ForInStatement", "WhileStatement", "DoWhileStatement"}
def trans(ele, standard=False):
"""Translates esprima syntax tree to python by delegating to appropriate translating node"""
@@ -367,9 +385,14 @@ def BreakStatement(type, label):
def ContinueStatement(type, label):
if label:
return 'raise %s("Continued")\n' % (get_continue_label(label['name']))
maybe_update_expr = loop_controller.get_update(label=label['name'])
continue_stmt = 'raise %s("Continued")\n' % (get_continue_label(label['name']))
else:
return 'continue\n'
maybe_update_expr = loop_controller.get_update()
continue_stmt = "continue\n"
if maybe_update_expr:
return "# continue update\n%s\n%s" % (maybe_update_expr, continue_stmt)
return continue_stmt
def ReturnStatement(type, argument):
@@ -386,24 +409,28 @@ def DebuggerStatement(type):
def DoWhileStatement(type, body, test):
inside = trans(body) + 'if not %s:\n' % trans(test) + indent('break\n')
loop_controller.enter()
body_code = trans(body)
loop_controller.leave()
inside = body_code + 'if not %s:\n' % trans(test) + indent('break\n')
result = 'while 1:\n' + indent(inside)
return result
def ForStatement(type, init, test, update, body):
update = indent(trans(update)) if update else ''
update = trans(update) if update else ''
init = trans(init) if init else ''
if not init.endswith('\n'):
init += '\n'
test = trans(test) if test else '1'
loop_controller.enter(update)
if not update:
result = '#for JS loop\n%swhile %s:\n%s%s\n' % (
init, test, indent(trans(body)), update)
else:
result = '#for JS loop\n%swhile %s:\n' % (init, test)
body = 'try:\n%sfinally:\n %s\n' % (indent(trans(body)), update)
result += indent(body)
result += indent("%s# update\n%s\n" % (trans(body), update))
loop_controller.leave()
return result
@@ -422,7 +449,9 @@ def ForInStatement(type, left, right, body, each):
name = left['name']
else:
raise RuntimeError('Unusual ForIn loop')
loop_controller.enter()
res += indent('var.put(%s, PyJsTemp)\n' % repr(name) + trans(body))
loop_controller.leave()
return res
@@ -438,20 +467,23 @@ def IfStatement(type, test, consequent, alternate):
def LabeledStatement(type, label, body):
# todo consider using smarter approach!
label_name = label['name']
loop_controller.register_label(label_name)
inside = trans(body)
loop_controller.deregister_label(label_name)
defs = ''
if inside.startswith('while ') or inside.startswith(
'for ') or inside.startswith('#for'):
if is_iteration_statement(body) and (inside.startswith('while ') or inside.startswith(
'for ') or inside.startswith('#for')):
# we have to add contine label as well...
# 3 or 1 since #for loop type has more lines before real for.
sep = 1 if not inside.startswith('#for') else 3
cont_label = get_continue_label(label['name'])
cont_label = get_continue_label(label_name)
temp = inside.split('\n')
injected = 'try:\n' + '\n'.join(temp[sep:])
injected += 'except %s:\n pass\n' % cont_label
inside = '\n'.join(temp[:sep]) + '\n' + indent(injected)
defs += 'class %s(Exception): pass\n' % cont_label
break_label = get_break_label(label['name'])
break_label = get_break_label(label_name)
inside = 'try:\n%sexcept %s:\n pass\n' % (indent(inside), break_label)
defs += 'class %s(Exception): pass\n' % break_label
return defs + inside
@@ -546,7 +578,11 @@ def VariableDeclaration(type, declarations, kind):
def WhileStatement(type, test, body):
result = 'while %s:\n' % trans(test) + indent(trans(body))
test_code = trans(test)
loop_controller.enter()
body_code = trans(body)
loop_controller.leave()
result = 'while %s:\n' % test_code + indent(body_code)
return result

View File

@@ -55,16 +55,19 @@ def dbg(x):
"""does nothing, legacy dummy function"""
return ''
# Another way of doing that would be with my auto esprima translation but its much slower:
# parsed = esprima.parse(js).to_dict()
def pyjsparser_parse_fn(code):
parser = pyjsparser.PyJsParser()
return parser.parse(code)
def translate_js(js, HEADER=DEFAULT_HEADER, use_compilation_plan=False):
def translate_js(js, HEADER=DEFAULT_HEADER, use_compilation_plan=False, parse_fn=pyjsparser_parse_fn):
"""js has to be a javascript source code.
returns equivalent python code."""
if use_compilation_plan and not '//' in js and not '/*' in js:
return translate_js_with_compilation_plan(js, HEADER=HEADER)
parser = pyjsparser.PyJsParser()
parsed = parser.parse(js) # js to esprima syntax tree
# Another way of doing that would be with my auto esprima translation but its much slower and causes import problems:
# parsed = esprima.parse(js).to_dict()
parsed = parse_fn(js)
translating_nodes.clean_stacks()
return HEADER + translating_nodes.trans(
parsed) # syntax tree to python code

View File

@@ -26,17 +26,19 @@ def fix_js_args(func):
return func
code = append_arguments(six.get_function_code(func), ('this', 'arguments'))
return types.FunctionType(
result = types.FunctionType(
code,
six.get_function_globals(func),
func.__name__,
closure=six.get_function_closure(func))
return result
def append_arguments(code_obj, new_locals):
co_varnames = code_obj.co_varnames # Old locals
co_names = code_obj.co_names # Old globals
co_names += tuple(e for e in new_locals if e not in co_names)
new_args = tuple(e for e in new_locals if e not in co_names)
co_names += new_args
co_argcount = code_obj.co_argcount # Argument count
co_code = code_obj.co_code # The actual bytecode as a string
@@ -76,26 +78,51 @@ def append_arguments(code_obj, new_locals):
names_to_varnames = dict(
(co_names.index(name), varnames.index(name)) for name in new_locals)
is_new_bytecode = sys.version_info >= (3, 11)
# Now we modify the actual bytecode
modified = []
drop_future_cache = False
for inst in instructions(code_obj):
if is_new_bytecode and inst.opname == "CACHE":
assert inst.arg == 0
if not drop_future_cache:
modified.extend(write_instruction(inst.opcode, inst.arg))
else:
# We need to inject NOOP to not break jumps :(
modified.extend(write_instruction(dis.opmap["NOP"], 0))
continue
op, arg = inst.opcode, inst.arg
# If the instruction is a LOAD_GLOBAL, we have to check to see if
# it's one of the globals that we are replacing. Either way,
# update its arg using the appropriate dict.
drop_future_cache = False
if inst.opcode == LOAD_GLOBAL:
if inst.arg in names_to_varnames:
idx = inst.arg
if is_new_bytecode:
idx = idx // 2
if idx in names_to_varnames:
op = LOAD_FAST
arg = names_to_varnames[inst.arg]
elif inst.arg in name_translations:
arg = name_translations[inst.arg]
arg = names_to_varnames[idx]
# Cache is not present after LOAD_FAST and needs to be removed.
drop_future_cache = True
elif idx in name_translations:
tgt = name_translations[idx]
if is_new_bytecode:
tgt = 2*tgt + (inst.arg % 2)
arg = tgt
else:
raise ValueError("a name was lost in translation")
raise(ValueError("a name was lost in translation last instruction %s" % str(inst)))
# If it accesses co_varnames or co_names then update its argument.
elif inst.opcode in opcode.haslocal:
arg = varname_translations[inst.arg]
elif inst.opcode in opcode.hasname:
# for example STORE_GLOBAL
arg = name_translations[inst.arg]
elif is_new_bytecode and inst.opcode in opcode.hasfree:
# Python 3.11+ adds refs at the end (after locals), for whatever reason...
if inst.argval not in code_obj.co_varnames[:code_obj.co_argcount]: # we do not need to remap existing arguments, they are not shifted by new ones.
arg = inst.arg + len(new_locals)
modified.extend(write_instruction(op, arg))
if six.PY2:
code = ''.join(modified)
@@ -113,23 +140,26 @@ def append_arguments(code_obj, new_locals):
code_obj.co_filename, code_obj.co_name,
code_obj.co_firstlineno, code_obj.co_lnotab,
code_obj.co_freevars, code_obj.co_cellvars)
# Done modifying codestring - make the code object
if hasattr(code_obj, "replace"):
# Python 3.8+
return code_obj.replace(
code_obj = code_obj.replace(
co_argcount=co_argcount + new_locals_len,
co_nlocals=code_obj.co_nlocals + new_locals_len,
co_code=code,
co_names=names,
co_varnames=varnames)
return code_obj
else:
return types.CodeType(*args)
def instructions(code_obj):
# easy for python 3.4+
if sys.version_info >= (3, 4):
def instructions(code_obj, show_cache=True):
if sys.version_info >= (3, 11):
# Python 3.11 introduced "cache instructions", hidden by default.
for inst in dis.Bytecode(code_obj, show_caches=show_cache):
yield inst
elif sys.version_info >= (3, 4): # easy for python 3.4+
for inst in dis.Bytecode(code_obj):
yield inst
else:
@@ -171,7 +201,7 @@ def write_instruction(op, arg):
chr((arg >> 8) & 255)
]
else:
raise ValueError("Invalid oparg: {0} is too large".format(oparg))
raise ValueError("Invalid oparg: {0} is too large".format(arg))
else: # python 3.6+ uses wordcode instead of bytecode and they already supply all the EXTENDEND_ARG ops :)
if arg is None:
return [chr(op), 0]
@@ -191,6 +221,7 @@ def write_instruction(op, arg):
# raise ValueError("Invalid oparg: {0} is too large".format(oparg))
def check(code_obj):
old_bytecode = code_obj.co_code
insts = list(instructions(code_obj))
@@ -221,24 +252,99 @@ def check(code_obj):
'Your python version made changes to the bytecode')
def signature(func):
code_obj = six.get_function_code(func)
return (code_obj.co_nlocals, code_obj.co_argcount, code_obj.co_nlocals, code_obj.co_stacksize,
code_obj.co_flags, code_obj.co_names, code_obj.co_varnames,
code_obj.co_filename,
code_obj.co_freevars, code_obj.co_cellvars)
check(six.get_function_code(check))
def compare_func(fake_func, gt_func):
print(signature(fake_func))
print(signature(gt_func))
assert signature(fake_func) == signature(gt_func)
fake_ins = list(instructions(six.get_function_code(fake_func), show_cache=False))
real_ins = list(instructions(six.get_function_code(gt_func), show_cache=False))
offset = 0
pos = 0
for e in fake_ins:
if e.opname == "NOP":
offset += 1 # ignore NOPs that are inserted in place of old cache.
else:
real = real_ins[pos]
fake = e
print("POS %d OFFSET: %d FAKE VS REAL" % (pos, offset))
print(fake)
print(real)
assert fake.opcode == real.opcode
if fake.opcode in dis.hasjabs or fake.opcode in dis.hasjrel:
pass
else:
assert fake.arg == real.arg
assert fake.argval == real.argval or fake.opname in ["LOAD_CONST"]
assert fake.is_jump_target == real.is_jump_target
pos += 1
assert pos == len(real_ins), (pos, len(real_ins))
print("DONE, looks good.")
if __name__ == '__main__':
x = 'Wrong'
dick = 3000
import faulthandler
def func(a):
print(x, y, z, a)
print(dick)
d = (x, )
for e in (e for e in x):
print(e)
return x, y, z
faulthandler.enable()
func2 = types.FunctionType(
append_arguments(six.get_function_code(func), ('x', 'y', 'z')),
six.get_function_globals(func),
func.__name__,
closure=six.get_function_closure(func))
args = (2, 2, 3, 4), 3, 4
assert func2(1, *args) == args
def func(cmpfn):
if not this.Class in ('Array', 'Arguments'):
return this.to_object() # do nothing
arr = []
for i in xrange(len(this)):
arr.append(this.get(six.text_type(i)))
if not arr:
return this
if not cmpfn.is_callable():
cmpfn = None
cmp = lambda a, b: sort_compare(a, b, cmpfn)
if six.PY3:
key = functools.cmp_to_key(cmp)
arr.sort(key=key)
else:
arr.sort(cmp=cmp)
for i in xrange(len(arr)):
this.put(six.text_type(i), arr[i])
return this
def func_gt(cmpfn, this, arguments):
if not this.Class in ('Array', 'Arguments'):
return this.to_object() # do nothing
arr = []
for i in xrange(len(this)):
arr.append(this.get(six.text_type(i)))
if not arr:
return this
if not cmpfn.is_callable():
cmpfn = None
cmp = lambda a, b: sort_compare(a, b, cmpfn)
if six.PY3:
key = functools.cmp_to_key(cmp)
arr.sort(key=key)
else:
arr.sort(cmp=cmp)
for i in xrange(len(arr)):
this.put(six.text_type(i), arr[i])
return this
func2 = fix_js_args(func)
compare_func(func2, func_gt)

546
lib/xmltodict.py Normal file
View File

@@ -0,0 +1,546 @@
#!/usr/bin/env python
"Makes working with XML feel like you are working with JSON"
try:
from defusedexpat import pyexpat as expat
except ImportError:
from xml.parsers import expat
from xml.sax.saxutils import XMLGenerator
from xml.sax.xmlreader import AttributesImpl
try: # pragma no cover
from cStringIO import StringIO
except ImportError: # pragma no cover
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
_dict = dict
import platform
if tuple(map(int, platform.python_version_tuple()[:2])) < (3, 7):
from collections import OrderedDict as _dict
from inspect import isgenerator
try: # pragma no cover
_basestring = basestring
except NameError: # pragma no cover
_basestring = str
try: # pragma no cover
_unicode = unicode
except NameError: # pragma no cover
_unicode = str
__author__ = 'Martin Blech'
__version__ = '0.13.0'
__license__ = 'MIT'
class ParsingInterrupted(Exception):
pass
class _DictSAXHandler(object):
def __init__(self,
item_depth=0,
item_callback=lambda *args: True,
xml_attribs=True,
attr_prefix='@',
cdata_key='#text',
force_cdata=False,
cdata_separator='',
postprocessor=None,
dict_constructor=_dict,
strip_whitespace=True,
namespace_separator=':',
namespaces=None,
force_list=None,
comment_key='#comment'):
self.path = []
self.stack = []
self.data = []
self.item = None
self.item_depth = item_depth
self.xml_attribs = xml_attribs
self.item_callback = item_callback
self.attr_prefix = attr_prefix
self.cdata_key = cdata_key
self.force_cdata = force_cdata
self.cdata_separator = cdata_separator
self.postprocessor = postprocessor
self.dict_constructor = dict_constructor
self.strip_whitespace = strip_whitespace
self.namespace_separator = namespace_separator
self.namespaces = namespaces
self.namespace_declarations = dict_constructor()
self.force_list = force_list
self.comment_key = comment_key
def _build_name(self, full_name):
if self.namespaces is None:
return full_name
i = full_name.rfind(self.namespace_separator)
if i == -1:
return full_name
namespace, name = full_name[:i], full_name[i+1:]
try:
short_namespace = self.namespaces[namespace]
except KeyError:
short_namespace = namespace
if not short_namespace:
return name
else:
return self.namespace_separator.join((short_namespace, name))
def _attrs_to_dict(self, attrs):
if isinstance(attrs, dict):
return attrs
return self.dict_constructor(zip(attrs[0::2], attrs[1::2]))
def startNamespaceDecl(self, prefix, uri):
self.namespace_declarations[prefix or ''] = uri
def startElement(self, full_name, attrs):
name = self._build_name(full_name)
attrs = self._attrs_to_dict(attrs)
if attrs and self.namespace_declarations:
attrs['xmlns'] = self.namespace_declarations
self.namespace_declarations = self.dict_constructor()
self.path.append((name, attrs or None))
if len(self.path) >= self.item_depth:
self.stack.append((self.item, self.data))
if self.xml_attribs:
attr_entries = []
for key, value in attrs.items():
key = self.attr_prefix+self._build_name(key)
if self.postprocessor:
entry = self.postprocessor(self.path, key, value)
else:
entry = (key, value)
if entry:
attr_entries.append(entry)
attrs = self.dict_constructor(attr_entries)
else:
attrs = None
self.item = attrs or None
self.data = []
def endElement(self, full_name):
name = self._build_name(full_name)
if len(self.path) == self.item_depth:
item = self.item
if item is None:
item = (None if not self.data
else self.cdata_separator.join(self.data))
should_continue = self.item_callback(self.path, item)
if not should_continue:
raise ParsingInterrupted()
if self.stack:
data = (None if not self.data
else self.cdata_separator.join(self.data))
item = self.item
self.item, self.data = self.stack.pop()
if self.strip_whitespace and data and item:
data = data.strip() or None
if data and self.force_cdata and item is None:
item = self.dict_constructor()
if item is not None:
if data:
self.push_data(item, self.cdata_key, data)
self.item = self.push_data(self.item, name, item)
else:
self.item = self.push_data(self.item, name, data)
else:
self.item = None
self.data = []
self.path.pop()
def characters(self, data):
if not self.data:
self.data = [data]
else:
self.data.append(data)
def comments(self, data):
if self.strip_whitespace:
data = data.strip()
self.item = self.push_data(self.item, self.comment_key, data)
def push_data(self, item, key, data):
if self.postprocessor is not None:
result = self.postprocessor(self.path, key, data)
if result is None:
return item
key, data = result
if item is None:
item = self.dict_constructor()
try:
value = item[key]
if isinstance(value, list):
value.append(data)
else:
item[key] = [value, data]
except KeyError:
if self._should_force_list(key, data):
item[key] = [data]
else:
item[key] = data
return item
def _should_force_list(self, key, value):
if not self.force_list:
return False
if isinstance(self.force_list, bool):
return self.force_list
try:
return key in self.force_list
except TypeError:
return self.force_list(self.path[:-1], key, value)
def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
namespace_separator=':', disable_entities=True, process_comments=False, **kwargs):
"""Parse the given XML input and convert it into a dictionary.
`xml_input` can either be a `string`, a file-like object, or a generator of strings.
If `xml_attribs` is `True`, element attributes are put in the dictionary
among regular child elements, using `@` as a prefix to avoid collisions. If
set to `False`, they are just ignored.
Simple example::
>>> import xmltodict
>>> doc = xmltodict.parse(\"\"\"
... <a prop="x">
... <b>1</b>
... <b>2</b>
... </a>
... \"\"\")
>>> doc['a']['@prop']
u'x'
>>> doc['a']['b']
[u'1', u'2']
If `item_depth` is `0`, the function returns a dictionary for the root
element (default behavior). Otherwise, it calls `item_callback` every time
an item at the specified depth is found and returns `None` in the end
(streaming mode).
The callback function receives two parameters: the `path` from the document
root to the item (name-attribs pairs), and the `item` (dict). If the
callback's return value is false-ish, parsing will be stopped with the
:class:`ParsingInterrupted` exception.
Streaming example::
>>> def handle(path, item):
... print('path:%s item:%s' % (path, item))
... return True
...
>>> xmltodict.parse(\"\"\"
... <a prop="x">
... <b>1</b>
... <b>2</b>
... </a>\"\"\", item_depth=2, item_callback=handle)
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:1
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:2
The optional argument `postprocessor` is a function that takes `path`,
`key` and `value` as positional arguments and returns a new `(key, value)`
pair where both `key` and `value` may have changed. Usage example::
>>> def postprocessor(path, key, value):
... try:
... return key + ':int', int(value)
... except (ValueError, TypeError):
... return key, value
>>> xmltodict.parse('<a><b>1</b><b>2</b><b>x</b></a>',
... postprocessor=postprocessor)
{'a': {'b:int': [1, 2], 'b': 'x'}}
You can pass an alternate version of `expat` (such as `defusedexpat`) by
using the `expat` parameter. E.g:
>>> import defusedexpat
>>> xmltodict.parse('<a>hello</a>', expat=defusedexpat.pyexpat)
{'a': 'hello'}
You can use the force_list argument to force lists to be created even
when there is only a single child of a given level of hierarchy. The
force_list argument is a tuple of keys. If the key for a given level
of hierarchy is in the force_list argument, that level of hierarchy
will have a list as a child (even if there is only one sub-element).
The index_keys operation takes precedence over this. This is applied
after any user-supplied postprocessor has already run.
For example, given this input:
<servers>
<server>
<name>host1</name>
<os>Linux</os>
<interfaces>
<interface>
<name>em0</name>
<ip_address>10.0.0.1</ip_address>
</interface>
</interfaces>
</server>
</servers>
If called with force_list=('interface',), it will produce
this dictionary:
{'servers':
{'server':
{'name': 'host1',
'os': 'Linux'},
'interfaces':
{'interface':
[ {'name': 'em0', 'ip_address': '10.0.0.1' } ] } } }
`force_list` can also be a callable that receives `path`, `key` and
`value`. This is helpful in cases where the logic that decides whether
a list should be forced is more complex.
If `process_comment` is `True` then comment will be added with comment_key
(default=`'#comment'`) to then tag which contains comment
For example, given this input:
<a>
<b>
<!-- b comment -->
<c>
<!-- c comment -->
1
</c>
<d>2</d>
</b>
</a>
If called with process_comment=True, it will produce
this dictionary:
'a': {
'b': {
'#comment': 'b comment',
'c': {
'#comment': 'c comment',
'#text': '1',
},
'd': '2',
},
}
"""
handler = _DictSAXHandler(namespace_separator=namespace_separator,
**kwargs)
if isinstance(xml_input, _unicode):
if not encoding:
encoding = 'utf-8'
xml_input = xml_input.encode(encoding)
if not process_namespaces:
namespace_separator = None
parser = expat.ParserCreate(
encoding,
namespace_separator
)
try:
parser.ordered_attributes = True
except AttributeError:
# Jython's expat does not support ordered_attributes
pass
parser.StartNamespaceDeclHandler = handler.startNamespaceDecl
parser.StartElementHandler = handler.startElement
parser.EndElementHandler = handler.endElement
parser.CharacterDataHandler = handler.characters
if process_comments:
parser.CommentHandler = handler.comments
parser.buffer_text = True
if disable_entities:
try:
# Attempt to disable DTD in Jython's expat parser (Xerces-J).
feature = "http://apache.org/xml/features/disallow-doctype-decl"
parser._reader.setFeature(feature, True)
except AttributeError:
# For CPython / expat parser.
# Anything not handled ends up here and entities aren't expanded.
parser.DefaultHandler = lambda x: None
# Expects an integer return; zero means failure -> expat.ExpatError.
parser.ExternalEntityRefHandler = lambda *x: 1
if hasattr(xml_input, 'read'):
parser.ParseFile(xml_input)
elif isgenerator(xml_input):
for chunk in xml_input:
parser.Parse(chunk, False)
parser.Parse(b'', True)
else:
parser.Parse(xml_input, True)
return handler.item
def _process_namespace(name, namespaces, ns_sep=':', attr_prefix='@'):
if not namespaces:
return name
try:
ns, name = name.rsplit(ns_sep, 1)
except ValueError:
pass
else:
ns_res = namespaces.get(ns.strip(attr_prefix))
name = '{}{}{}{}'.format(
attr_prefix if ns.startswith(attr_prefix) else '',
ns_res, ns_sep, name) if ns_res else name
return name
def _emit(key, value, content_handler,
attr_prefix='@',
cdata_key='#text',
depth=0,
preprocessor=None,
pretty=False,
newl='\n',
indent='\t',
namespace_separator=':',
namespaces=None,
full_document=True,
expand_iter=None):
key = _process_namespace(key, namespaces, namespace_separator, attr_prefix)
if preprocessor is not None:
result = preprocessor(key, value)
if result is None:
return
key, value = result
if (not hasattr(value, '__iter__')
or isinstance(value, _basestring)
or isinstance(value, dict)):
value = [value]
for index, v in enumerate(value):
if full_document and depth == 0 and index > 0:
raise ValueError('document with multiple roots')
if v is None:
v = _dict()
elif isinstance(v, bool):
if v:
v = _unicode('true')
else:
v = _unicode('false')
elif not isinstance(v, dict):
if expand_iter and hasattr(v, '__iter__') and not isinstance(v, _basestring):
v = _dict(((expand_iter, v),))
else:
v = _unicode(v)
if isinstance(v, _basestring):
v = _dict(((cdata_key, v),))
cdata = None
attrs = _dict()
children = []
for ik, iv in v.items():
if ik == cdata_key:
cdata = iv
continue
if ik.startswith(attr_prefix):
ik = _process_namespace(ik, namespaces, namespace_separator,
attr_prefix)
if ik == '@xmlns' and isinstance(iv, dict):
for k, v in iv.items():
attr = 'xmlns{}'.format(':{}'.format(k) if k else '')
attrs[attr] = _unicode(v)
continue
if not isinstance(iv, _unicode):
iv = _unicode(iv)
attrs[ik[len(attr_prefix):]] = iv
continue
children.append((ik, iv))
if type(indent) is int:
indent = ' ' * indent
if pretty:
content_handler.ignorableWhitespace(depth * indent)
content_handler.startElement(key, AttributesImpl(attrs))
if pretty and children:
content_handler.ignorableWhitespace(newl)
for child_key, child_value in children:
_emit(child_key, child_value, content_handler,
attr_prefix, cdata_key, depth+1, preprocessor,
pretty, newl, indent, namespaces=namespaces,
namespace_separator=namespace_separator,
expand_iter=expand_iter)
if cdata is not None:
content_handler.characters(cdata)
if pretty and children:
content_handler.ignorableWhitespace(depth * indent)
content_handler.endElement(key)
if pretty and depth:
content_handler.ignorableWhitespace(newl)
def unparse(input_dict, output=None, encoding='utf-8', full_document=True,
short_empty_elements=False,
**kwargs):
"""Emit an XML document for the given `input_dict` (reverse of `parse`).
The resulting XML document is returned as a string, but if `output` (a
file-like object) is specified, it is written there instead.
Dictionary keys prefixed with `attr_prefix` (default=`'@'`) are interpreted
as XML node attributes, whereas keys equal to `cdata_key`
(default=`'#text'`) are treated as character data.
The `pretty` parameter (default=`False`) enables pretty-printing. In this
mode, lines are terminated with `'\n'` and indented with `'\t'`, but this
can be customized with the `newl` and `indent` parameters.
"""
if full_document and len(input_dict) != 1:
raise ValueError('Document must have exactly one root.')
must_return = False
if output is None:
output = StringIO()
must_return = True
if short_empty_elements:
content_handler = XMLGenerator(output, encoding, True)
else:
content_handler = XMLGenerator(output, encoding)
if full_document:
content_handler.startDocument()
for key, value in input_dict.items():
_emit(key, value, content_handler, full_document=full_document,
**kwargs)
if full_document:
content_handler.endDocument()
if must_return:
value = output.getvalue()
try: # pragma no cover
value = value.decode(encoding)
except AttributeError: # pragma no cover
pass
return value
if __name__ == '__main__': # pragma: no cover
import sys
import marshal
try:
stdin = sys.stdin.buffer
stdout = sys.stdout.buffer
except AttributeError:
stdin = sys.stdin
stdout = sys.stdout
(item_depth,) = sys.argv[1:]
item_depth = int(item_depth)
def handle_item(path, item):
marshal.dump((path, item), stdout)
return True
try:
root = parse(stdin,
item_depth=item_depth,
item_callback=handle_item,
dict_constructor=dict)
if item_depth == 0:
handle_item([], root)
except KeyboardInterrupt:
pass