# -*- coding: utf-8 -*- # Copyright JS Foundation and other contributors, https://js.foundation/ # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from __future__ import absolute_import, unicode_literals import re from .objects import Object from .compat import xrange, unicode, uchr, uord from .character import Character, HEX_CONV, OCTAL_CONV from .messages import Messages from .token import Token def hexValue(ch): return HEX_CONV[ch] def octalValue(ch): return OCTAL_CONV[ch] class RegExp(Object): def __init__(self, pattern=None, flags=None): self.pattern = pattern self.flags = flags class Position(Object): def __init__(self, line=None, column=None, offset=None): self.line = line self.column = column self.offset = offset class SourceLocation(Object): def __init__(self, start=None, end=None, source=None): self.start = start self.end = end self.source = source class Comment(Object): def __init__(self, multiLine=None, slice=None, range=None, loc=None): self.multiLine = multiLine self.slice = slice self.range = range self.loc = loc class RawToken(Object): def __init__(self, type=None, value=None, pattern=None, flags=None, regex=None, octal=None, cooked=None, head=None, tail=None, lineNumber=None, lineStart=None, start=None, end=None): self.type = type self.value = value self.pattern = pattern self.flags = flags self.regex = regex self.octal = octal self.cooked = cooked self.head = head self.tail = tail self.lineNumber = lineNumber self.lineStart = lineStart self.start = start self.end = end class ScannerState(Object): def __init__(self, index=None, lineNumber=None, lineStart=None): self.index = index self.lineNumber = lineNumber self.lineStart = lineStart class Octal(object): def __init__(self, octal, code): self.octal = octal self.code = code class Scanner(object): def __init__(self, code, handler): self.source = unicode(code) + '\x00' self.errorHandler = handler self.trackComment = False self.isModule = False self.length = len(code) self.index = 0 self.lineNumber = 1 if self.length > 0 else 0 self.lineStart = 0 self.curlyStack = [] def saveState(self): return ScannerState( index=self.index, lineNumber=self.lineNumber, lineStart=self.lineStart ) def restoreState(self, state): self.index = state.index self.lineNumber = state.lineNumber self.lineStart = state.lineStart def eof(self): return self.index >= self.length def throwUnexpectedToken(self, message=Messages.UnexpectedTokenIllegal): return self.errorHandler.throwError(self.index, self.lineNumber, self.index - self.lineStart + 1, message) def tolerateUnexpectedToken(self, message=Messages.UnexpectedTokenIllegal): self.errorHandler.tolerateError(self.index, self.lineNumber, self.index - self.lineStart + 1, message) # https://tc39.github.io/ecma262/#sec-comments def skipSingleLineComment(self, offset): comments = [] if self.trackComment: start = self.index - offset loc = SourceLocation( start=Position( line=self.lineNumber, column=self.index - self.lineStart - offset ), end=Position() ) while not self.eof(): ch = self.source[self.index] self.index += 1 if Character.isLineTerminator(ch): if self.trackComment: loc.end = Position( line=self.lineNumber, column=self.index - self.lineStart - 1 ) entry = Comment( multiLine=False, slice=[start + offset, self.index - 1], range=[start, self.index - 1], loc=loc ) comments.append(entry) if ch == '\r' and self.source[self.index] == '\n': self.index += 1 self.lineNumber += 1 self.lineStart = self.index return comments if self.trackComment: loc.end = Position( line=self.lineNumber, column=self.index - self.lineStart ) entry = Comment( multiLine=False, slice=[start + offset, self.index], range=[start, self.index], loc=loc ) comments.append(entry) return comments def skipMultiLineComment(self): comments = [] if self.trackComment: comments = [] start = self.index - 2 loc = SourceLocation( start=Position( line=self.lineNumber, column=self.index - self.lineStart - 2 ), end=Position() ) while not self.eof(): ch = self.source[self.index] if Character.isLineTerminator(ch): if ch == '\r' and self.source[self.index + 1] == '\n': self.index += 1 self.lineNumber += 1 self.index += 1 self.lineStart = self.index elif ch == '*': # Block comment ends with '*/'. if self.source[self.index + 1] == '/': self.index += 2 if self.trackComment: loc.end = Position( line=self.lineNumber, column=self.index - self.lineStart ) entry = Comment( multiLine=True, slice=[start + 2, self.index - 2], range=[start, self.index], loc=loc ) comments.append(entry) return comments self.index += 1 else: self.index += 1 # Ran off the end of the file - the whole thing is a comment if self.trackComment: loc.end = Position( line=self.lineNumber, column=self.index - self.lineStart ) entry = Comment( multiLine=True, slice=[start + 2, self.index], range=[start, self.index], loc=loc ) comments.append(entry) self.tolerateUnexpectedToken() return comments def scanComments(self): comments = [] start = self.index == 0 while not self.eof(): ch = self.source[self.index] if Character.isWhiteSpace(ch): self.index += 1 elif Character.isLineTerminator(ch): self.index += 1 if ch == '\r' and self.source[self.index] == '\n': self.index += 1 self.lineNumber += 1 self.lineStart = self.index start = True elif ch == '/': # U+002F is '/' ch = self.source[self.index + 1] if ch == '/': self.index += 2 comment = self.skipSingleLineComment(2) if self.trackComment: comments.extend(comment) start = True elif ch == '*': # U+002A is '*' self.index += 2 comment = self.skipMultiLineComment() if self.trackComment: comments.extend(comment) else: break elif start and ch == '-': # U+002D is '-' # U+003E is '>' if self.source[self.index + 1:self.index + 3] == '->': # '-->' is a single-line comment self.index += 3 comment = self.skipSingleLineComment(3) if self.trackComment: comments.extend(comment) else: break elif ch == '<' and not self.isModule: # U+003C is '<' if self.source[self.index + 1:self.index + 4] == '!--': self.index += 4 # `