diff --git a/nac3ast/Cargo.toml b/nac3ast/Cargo.toml new file mode 100644 index 00000000..24aa0e12 --- /dev/null +++ b/nac3ast/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "rustpython-ast" +version = "0.1.0" +authors = ["RustPython Team"] +edition = "2018" + +[features] +default = ["constant-optimization", "fold"] +constant-optimization = ["fold"] +fold = [] + +[dependencies] +num-bigint = "0.4.0" diff --git a/nac3ast/Python.asdl b/nac3ast/Python.asdl new file mode 100644 index 00000000..b3abe162 --- /dev/null +++ b/nac3ast/Python.asdl @@ -0,0 +1,125 @@ +-- ASDL's 4 builtin types are: +-- identifier, int, string, constant + +module Python +{ + mod = Module(stmt* body, type_ignore* type_ignores) + | Interactive(stmt* body) + | Expression(expr body) + | FunctionType(expr* argtypes, expr returns) + + stmt = FunctionDef(identifier name, arguments args, + stmt* body, expr* decorator_list, expr? returns, + string? type_comment) + | AsyncFunctionDef(identifier name, arguments args, + stmt* body, expr* decorator_list, expr? returns, + string? type_comment) + + | ClassDef(identifier name, + expr* bases, + keyword* keywords, + stmt* body, + expr* decorator_list) + | Return(expr? value) + + | Delete(expr* targets) + | Assign(expr* targets, expr value, string? type_comment) + | AugAssign(expr target, operator op, expr value) + -- 'simple' indicates that we annotate simple name without parens + | AnnAssign(expr target, expr annotation, expr? value, bool simple) + + -- use 'orelse' because else is a keyword in target languages + | For(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment) + | AsyncFor(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment) + | While(expr test, stmt* body, stmt* orelse) + | If(expr test, stmt* body, stmt* orelse) + | With(withitem* items, stmt* body, string? type_comment) + | AsyncWith(withitem* items, stmt* body, string? type_comment) + + | Raise(expr? exc, expr? cause) + | Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody) + | Assert(expr test, expr? msg) + + | Import(alias* names) + | ImportFrom(identifier? module, alias* names, int level) + + | Global(identifier* names) + | Nonlocal(identifier* names) + | Expr(expr value) + | Pass | Break | Continue + + -- col_offset is the byte offset in the utf8 string the parser uses + attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset) + + -- BoolOp() can use left & right? + expr = BoolOp(boolop op, expr* values) + | NamedExpr(expr target, expr value) + | BinOp(expr left, operator op, expr right) + | UnaryOp(unaryop op, expr operand) + | Lambda(arguments args, expr body) + | IfExp(expr test, expr body, expr orelse) + | Dict(expr?* keys, expr* values) + | Set(expr* elts) + | ListComp(expr elt, comprehension* generators) + | SetComp(expr elt, comprehension* generators) + | DictComp(expr key, expr value, comprehension* generators) + | GeneratorExp(expr elt, comprehension* generators) + -- the grammar constrains where yield expressions can occur + | Await(expr value) + | Yield(expr? value) + | YieldFrom(expr value) + -- need sequences for compare to distinguish between + -- x < 4 < 3 and (x < 4) < 3 + | Compare(expr left, cmpop* ops, expr* comparators) + | Call(expr func, expr* args, keyword* keywords) + | FormattedValue(expr value, conversion_flag? conversion, expr? format_spec) + | JoinedStr(expr* values) + | Constant(constant value, string? kind) + + -- the following expression can appear in assignment context + | Attribute(expr value, identifier attr, expr_context ctx) + | Subscript(expr value, expr slice, expr_context ctx) + | Starred(expr value, expr_context ctx) + | Name(identifier id, expr_context ctx) + | List(expr* elts, expr_context ctx) + | Tuple(expr* elts, expr_context ctx) + + -- can appear only in Subscript + | Slice(expr? lower, expr? upper, expr? step) + + -- col_offset is the byte offset in the utf8 string the parser uses + attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset) + + expr_context = Load | Store | Del + + boolop = And | Or + + operator = Add | Sub | Mult | MatMult | Div | Mod | Pow | LShift + | RShift | BitOr | BitXor | BitAnd | FloorDiv + + unaryop = Invert | Not | UAdd | USub + + cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn + + comprehension = (expr target, expr iter, expr* ifs, bool is_async) + + excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body) + attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset) + + arguments = (arg* posonlyargs, arg* args, arg? vararg, arg* kwonlyargs, + expr?* kw_defaults, arg? kwarg, expr* defaults) + + arg = (identifier arg, expr? annotation, string? type_comment) + attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset) + + -- keyword arguments supplied to call (NULL identifier for **kwargs) + keyword = (identifier? arg, expr value) + attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset) + + -- import name with optional 'as' alias. + alias = (identifier name, identifier? asname) + + withitem = (expr context_expr, expr? optional_vars) + + type_ignore = TypeIgnore(int lineno, string tag) +} diff --git a/nac3ast/asdl.py b/nac3ast/asdl.py new file mode 100644 index 00000000..74f84c89 --- /dev/null +++ b/nac3ast/asdl.py @@ -0,0 +1,385 @@ +#------------------------------------------------------------------------------- +# Parser for ASDL [1] definition files. Reads in an ASDL description and parses +# it into an AST that describes it. +# +# The EBNF we're parsing here: Figure 1 of the paper [1]. Extended to support +# modules and attributes after a product. Words starting with Capital letters +# are terminals. Literal tokens are in "double quotes". Others are +# non-terminals. Id is either TokenId or ConstructorId. +# +# module ::= "module" Id "{" [definitions] "}" +# definitions ::= { TypeId "=" type } +# type ::= product | sum +# product ::= fields ["attributes" fields] +# fields ::= "(" { field, "," } field ")" +# field ::= TypeId ["?" | "*"] [Id] +# sum ::= constructor { "|" constructor } ["attributes" fields] +# constructor ::= ConstructorId [fields] +# +# [1] "The Zephyr Abstract Syntax Description Language" by Wang, et. al. See +# http://asdl.sourceforge.net/ +#------------------------------------------------------------------------------- +from collections import namedtuple +import re + +__all__ = [ + 'builtin_types', 'parse', 'AST', 'Module', 'Type', 'Constructor', + 'Field', 'Sum', 'Product', 'VisitorBase', 'Check', 'check'] + +# The following classes define nodes into which the ASDL description is parsed. +# Note: this is a "meta-AST". ASDL files (such as Python.asdl) describe the AST +# structure used by a programming language. But ASDL files themselves need to be +# parsed. This module parses ASDL files and uses a simple AST to represent them. +# See the EBNF at the top of the file to understand the logical connection +# between the various node types. + +builtin_types = {'identifier', 'string', 'int', 'constant', 'bool', 'conversion_flag'} + +class AST: + def __repr__(self): + raise NotImplementedError + +class Module(AST): + def __init__(self, name, dfns): + self.name = name + self.dfns = dfns + self.types = {type.name: type.value for type in dfns} + + def __repr__(self): + return 'Module({0.name}, {0.dfns})'.format(self) + +class Type(AST): + def __init__(self, name, value): + self.name = name + self.value = value + + def __repr__(self): + return 'Type({0.name}, {0.value})'.format(self) + +class Constructor(AST): + def __init__(self, name, fields=None): + self.name = name + self.fields = fields or [] + + def __repr__(self): + return 'Constructor({0.name}, {0.fields})'.format(self) + +class Field(AST): + def __init__(self, type, name=None, seq=False, opt=False): + self.type = type + self.name = name + self.seq = seq + self.opt = opt + + def __str__(self): + if self.seq: + extra = "*" + elif self.opt: + extra = "?" + else: + extra = "" + + return "{}{} {}".format(self.type, extra, self.name) + + def __repr__(self): + if self.seq: + extra = ", seq=True" + elif self.opt: + extra = ", opt=True" + else: + extra = "" + if self.name is None: + return 'Field({0.type}{1})'.format(self, extra) + else: + return 'Field({0.type}, {0.name}{1})'.format(self, extra) + +class Sum(AST): + def __init__(self, types, attributes=None): + self.types = types + self.attributes = attributes or [] + + def __repr__(self): + if self.attributes: + return 'Sum({0.types}, {0.attributes})'.format(self) + else: + return 'Sum({0.types})'.format(self) + +class Product(AST): + def __init__(self, fields, attributes=None): + self.fields = fields + self.attributes = attributes or [] + + def __repr__(self): + if self.attributes: + return 'Product({0.fields}, {0.attributes})'.format(self) + else: + return 'Product({0.fields})'.format(self) + +# A generic visitor for the meta-AST that describes ASDL. This can be used by +# emitters. Note that this visitor does not provide a generic visit method, so a +# subclass needs to define visit methods from visitModule to as deep as the +# interesting node. +# We also define a Check visitor that makes sure the parsed ASDL is well-formed. + +class VisitorBase(object): + """Generic tree visitor for ASTs.""" + def __init__(self): + self.cache = {} + + def visit(self, obj, *args): + klass = obj.__class__ + meth = self.cache.get(klass) + if meth is None: + methname = "visit" + klass.__name__ + meth = getattr(self, methname, None) + self.cache[klass] = meth + if meth: + try: + meth(obj, *args) + except Exception as e: + print("Error visiting %r: %s" % (obj, e)) + raise + +class Check(VisitorBase): + """A visitor that checks a parsed ASDL tree for correctness. + + Errors are printed and accumulated. + """ + def __init__(self): + super(Check, self).__init__() + self.cons = {} + self.errors = 0 + self.types = {} + + def visitModule(self, mod): + for dfn in mod.dfns: + self.visit(dfn) + + def visitType(self, type): + self.visit(type.value, str(type.name)) + + def visitSum(self, sum, name): + for t in sum.types: + self.visit(t, name) + + def visitConstructor(self, cons, name): + key = str(cons.name) + conflict = self.cons.get(key) + if conflict is None: + self.cons[key] = name + else: + print('Redefinition of constructor {}'.format(key)) + print('Defined in {} and {}'.format(conflict, name)) + self.errors += 1 + for f in cons.fields: + self.visit(f, key) + + def visitField(self, field, name): + key = str(field.type) + l = self.types.setdefault(key, []) + l.append(name) + + def visitProduct(self, prod, name): + for f in prod.fields: + self.visit(f, name) + +def check(mod): + """Check the parsed ASDL tree for correctness. + + Return True if success. For failure, the errors are printed out and False + is returned. + """ + v = Check() + v.visit(mod) + + for t in v.types: + if t not in mod.types and not t in builtin_types: + v.errors += 1 + uses = ", ".join(v.types[t]) + print('Undefined type {}, used in {}'.format(t, uses)) + return not v.errors + +# The ASDL parser itself comes next. The only interesting external interface +# here is the top-level parse function. + +def parse(filename): + """Parse ASDL from the given file and return a Module node describing it.""" + with open(filename) as f: + parser = ASDLParser() + return parser.parse(f.read()) + +# Types for describing tokens in an ASDL specification. +class TokenKind: + """TokenKind is provides a scope for enumerated token kinds.""" + (ConstructorId, TypeId, Equals, Comma, Question, Pipe, Asterisk, + LParen, RParen, LBrace, RBrace) = range(11) + + operator_table = { + '=': Equals, ',': Comma, '?': Question, '|': Pipe, '(': LParen, + ')': RParen, '*': Asterisk, '{': LBrace, '}': RBrace} + +Token = namedtuple('Token', 'kind value lineno') + +class ASDLSyntaxError(Exception): + def __init__(self, msg, lineno=None): + self.msg = msg + self.lineno = lineno or '' + + def __str__(self): + return 'Syntax error on line {0.lineno}: {0.msg}'.format(self) + +def tokenize_asdl(buf): + """Tokenize the given buffer. Yield Token objects.""" + for lineno, line in enumerate(buf.splitlines(), 1): + for m in re.finditer(r'\s*(\w+|--.*|.)', line.strip()): + c = m.group(1) + if c[0].isalpha(): + # Some kind of identifier + if c[0].isupper(): + yield Token(TokenKind.ConstructorId, c, lineno) + else: + yield Token(TokenKind.TypeId, c, lineno) + elif c[:2] == '--': + # Comment + break + else: + # Operators + try: + op_kind = TokenKind.operator_table[c] + except KeyError: + raise ASDLSyntaxError('Invalid operator %s' % c, lineno) + yield Token(op_kind, c, lineno) + +class ASDLParser: + """Parser for ASDL files. + + Create, then call the parse method on a buffer containing ASDL. + This is a simple recursive descent parser that uses tokenize_asdl for the + lexing. + """ + def __init__(self): + self._tokenizer = None + self.cur_token = None + + def parse(self, buf): + """Parse the ASDL in the buffer and return an AST with a Module root. + """ + self._tokenizer = tokenize_asdl(buf) + self._advance() + return self._parse_module() + + def _parse_module(self): + if self._at_keyword('module'): + self._advance() + else: + raise ASDLSyntaxError( + 'Expected "module" (found {})'.format(self.cur_token.value), + self.cur_token.lineno) + name = self._match(self._id_kinds) + self._match(TokenKind.LBrace) + defs = self._parse_definitions() + self._match(TokenKind.RBrace) + return Module(name, defs) + + def _parse_definitions(self): + defs = [] + while self.cur_token.kind == TokenKind.TypeId: + typename = self._advance() + self._match(TokenKind.Equals) + type = self._parse_type() + defs.append(Type(typename, type)) + return defs + + def _parse_type(self): + if self.cur_token.kind == TokenKind.LParen: + # If we see a (, it's a product + return self._parse_product() + else: + # Otherwise it's a sum. Look for ConstructorId + sumlist = [Constructor(self._match(TokenKind.ConstructorId), + self._parse_optional_fields())] + while self.cur_token.kind == TokenKind.Pipe: + # More constructors + self._advance() + sumlist.append(Constructor( + self._match(TokenKind.ConstructorId), + self._parse_optional_fields())) + return Sum(sumlist, self._parse_optional_attributes()) + + def _parse_product(self): + return Product(self._parse_fields(), self._parse_optional_attributes()) + + def _parse_fields(self): + fields = [] + self._match(TokenKind.LParen) + while self.cur_token.kind == TokenKind.TypeId: + typename = self._advance() + is_seq, is_opt = self._parse_optional_field_quantifier() + id = (self._advance() if self.cur_token.kind in self._id_kinds + else None) + fields.append(Field(typename, id, seq=is_seq, opt=is_opt)) + if self.cur_token.kind == TokenKind.RParen: + break + elif self.cur_token.kind == TokenKind.Comma: + self._advance() + self._match(TokenKind.RParen) + return fields + + def _parse_optional_fields(self): + if self.cur_token.kind == TokenKind.LParen: + return self._parse_fields() + else: + return None + + def _parse_optional_attributes(self): + if self._at_keyword('attributes'): + self._advance() + return self._parse_fields() + else: + return None + + def _parse_optional_field_quantifier(self): + is_seq, is_opt = False, False + if self.cur_token.kind == TokenKind.Question: + is_opt = True + self._advance() + if self.cur_token.kind == TokenKind.Asterisk: + is_seq = True + self._advance() + return is_seq, is_opt + + def _advance(self): + """ Return the value of the current token and read the next one into + self.cur_token. + """ + cur_val = None if self.cur_token is None else self.cur_token.value + try: + self.cur_token = next(self._tokenizer) + except StopIteration: + self.cur_token = None + return cur_val + + _id_kinds = (TokenKind.ConstructorId, TokenKind.TypeId) + + def _match(self, kind): + """The 'match' primitive of RD parsers. + + * Verifies that the current token is of the given kind (kind can + be a tuple, in which the kind must match one of its members). + * Returns the value of the current token + * Reads in the next token + """ + if (isinstance(kind, tuple) and self.cur_token.kind in kind or + self.cur_token.kind == kind + ): + value = self.cur_token.value + self._advance() + return value + else: + raise ASDLSyntaxError( + 'Unmatched {} (found {})'.format(kind, self.cur_token.kind), + self.cur_token.lineno) + + def _at_keyword(self, keyword): + return (self.cur_token.kind == TokenKind.TypeId and + self.cur_token.value == keyword) diff --git a/nac3ast/asdl_rs.py b/nac3ast/asdl_rs.py new file mode 100755 index 00000000..3c5d6fd5 --- /dev/null +++ b/nac3ast/asdl_rs.py @@ -0,0 +1,609 @@ +#! /usr/bin/env python +"""Generate Rust code from an ASDL description.""" + +import os +import sys +import textwrap + +import json + +from argparse import ArgumentParser +from pathlib import Path + +import asdl + +TABSIZE = 4 +AUTOGEN_MESSAGE = "// File automatically generated by {}.\n\n" + +builtin_type_mapping = { + 'identifier': 'Ident', + 'string': 'String', + 'int': 'usize', + 'constant': 'Constant', + 'bool': 'bool', + 'conversion_flag': 'ConversionFlag', +} +assert builtin_type_mapping.keys() == asdl.builtin_types + +def get_rust_type(name): + """Return a string for the C name of the type. + + This function special cases the default types provided by asdl. + """ + if name in asdl.builtin_types: + return builtin_type_mapping[name] + else: + return "".join(part.capitalize() for part in name.split("_")) + +def is_simple(sum): + """Return True if a sum is a simple. + + A sum is simple if its types have no fields, e.g. + unaryop = Invert | Not | UAdd | USub + """ + for t in sum.types: + if t.fields: + return False + return True + +def asdl_of(name, obj): + if isinstance(obj, asdl.Product) or isinstance(obj, asdl.Constructor): + fields = ", ".join(map(str, obj.fields)) + if fields: + fields = "({})".format(fields) + return "{}{}".format(name, fields) + else: + if is_simple(obj): + types = " | ".join(type.name for type in obj.types) + else: + sep = "\n{}| ".format(" " * (len(name) + 1)) + types = sep.join( + asdl_of(type.name, type) for type in obj.types + ) + return "{} = {}".format(name, types) + +class EmitVisitor(asdl.VisitorBase): + """Visit that emits lines""" + + def __init__(self, file): + self.file = file + self.identifiers = set() + super(EmitVisitor, self).__init__() + + def emit_identifier(self, name): + name = str(name) + if name in self.identifiers: + return + self.emit("_Py_IDENTIFIER(%s);" % name, 0) + self.identifiers.add(name) + + def emit(self, line, depth): + if line: + line = (" " * TABSIZE * depth) + line + self.file.write(line + "\n") + +class TypeInfo: + def __init__(self, name): + self.name = name + self.has_userdata = None + self.children = set() + self.boxed = False + + def __repr__(self): + return f"" + + def determine_userdata(self, typeinfo, stack): + if self.name in stack: + return None + stack.add(self.name) + for child, child_seq in self.children: + if child in asdl.builtin_types: + continue + childinfo = typeinfo[child] + child_has_userdata = childinfo.determine_userdata(typeinfo, stack) + if self.has_userdata is None and child_has_userdata is True: + self.has_userdata = True + + stack.remove(self.name) + return self.has_userdata + +class FindUserdataTypesVisitor(asdl.VisitorBase): + def __init__(self, typeinfo): + self.typeinfo = typeinfo + super().__init__() + + def visitModule(self, mod): + for dfn in mod.dfns: + self.visit(dfn) + stack = set() + for info in self.typeinfo.values(): + info.determine_userdata(self.typeinfo, stack) + + def visitType(self, type): + self.typeinfo[type.name] = TypeInfo(type.name) + self.visit(type.value, type.name) + + def visitSum(self, sum, name): + info = self.typeinfo[name] + if is_simple(sum): + info.has_userdata = False + else: + if len(sum.types) > 1: + info.boxed = True + if sum.attributes: + # attributes means Located, which has the `custom: U` field + info.has_userdata = True + for variant in sum.types: + self.add_children(name, variant.fields) + + def visitProduct(self, product, name): + info = self.typeinfo[name] + if product.attributes: + # attributes means Located, which has the `custom: U` field + info.has_userdata = True + if len(product.fields) > 2: + info.boxed = True + self.add_children(name, product.fields) + + def add_children(self, name, fields): + self.typeinfo[name].children.update((field.type, field.seq) for field in fields) + +def rust_field(field_name): + if field_name == 'type': + return 'type_' + else: + return field_name + +class TypeInfoEmitVisitor(EmitVisitor): + def __init__(self, file, typeinfo): + self.typeinfo = typeinfo + super().__init__(file) + + def has_userdata(self, typ): + return self.typeinfo[typ].has_userdata + + def get_generics(self, typ, *generics): + if self.has_userdata(typ): + return [f"<{g}>" for g in generics] + else: + return ["" for g in generics] + +class StructVisitor(TypeInfoEmitVisitor): + """Visitor to generate typedefs for AST.""" + + def visitModule(self, mod): + for dfn in mod.dfns: + self.visit(dfn) + + def visitType(self, type, depth=0): + self.visit(type.value, type.name, depth) + + def visitSum(self, sum, name, depth): + if is_simple(sum): + self.simple_sum(sum, name, depth) + else: + self.sum_with_constructors(sum, name, depth) + + def emit_attrs(self, depth): + self.emit("#[derive(Debug, PartialEq)]", depth) + + def simple_sum(self, sum, name, depth): + rustname = get_rust_type(name) + self.emit_attrs(depth) + self.emit(f"pub enum {rustname} {{", depth) + for variant in sum.types: + self.emit(f"{variant.name},", depth + 1) + self.emit("}", depth) + self.emit("", depth) + + def sum_with_constructors(self, sum, name, depth): + typeinfo = self.typeinfo[name] + generics, generics_applied = self.get_generics(name, "U = ()", "U") + enumname = rustname = get_rust_type(name) + # all the attributes right now are for location, so if it has attrs we + # can just wrap it in Located<> + if sum.attributes: + enumname = rustname + "Kind" + self.emit_attrs(depth) + self.emit(f"pub enum {enumname}{generics} {{", depth) + for t in sum.types: + self.visit(t, typeinfo, depth + 1) + self.emit("}", depth) + if sum.attributes: + self.emit(f"pub type {rustname} = Located<{enumname}{generics_applied}, U>;", depth) + self.emit("", depth) + + def visitConstructor(self, cons, parent, depth): + if cons.fields: + self.emit(f"{cons.name} {{", depth) + for f in cons.fields: + self.visit(f, parent, "", depth + 1) + self.emit("},", depth) + else: + self.emit(f"{cons.name},", depth) + + def visitField(self, field, parent, vis, depth): + typ = get_rust_type(field.type) + fieldtype = self.typeinfo.get(field.type) + if fieldtype and fieldtype.has_userdata: + typ = f"{typ}" + # don't box if we're doing Vec, but do box if we're doing Vec>> + if fieldtype and fieldtype.boxed and (not field.seq or field.opt): + typ = f"Box<{typ}>" + if field.opt: + typ = f"Option<{typ}>" + if field.seq: + typ = f"Vec<{typ}>" + name = rust_field(field.name) + self.emit(f"{vis}{name}: {typ},", depth) + + def visitProduct(self, product, name, depth): + typeinfo = self.typeinfo[name] + generics, generics_applied = self.get_generics(name, "U = ()", "U") + dataname = rustname = get_rust_type(name) + if product.attributes: + dataname = rustname + "Data" + self.emit_attrs(depth) + self.emit(f"pub struct {dataname}{generics} {{", depth) + for f in product.fields: + self.visit(f, typeinfo, "pub ", depth + 1) + self.emit("}", depth) + if product.attributes: + # attributes should just be location info + self.emit(f"pub type {rustname} = Located<{dataname}{generics_applied}, U>;", depth); + self.emit("", depth) + + +class FoldTraitDefVisitor(TypeInfoEmitVisitor): + def visitModule(self, mod, depth): + self.emit("pub trait Fold {", depth) + self.emit("type TargetU;", depth + 1) + self.emit("type Error;", depth + 1) + self.emit("fn map_user(&mut self, user: U) -> Result;", depth + 2) + for dfn in mod.dfns: + self.visit(dfn, depth + 2) + self.emit("}", depth) + + def visitType(self, type, depth): + name = type.name + apply_u, apply_target_u = self.get_generics(name, "U", "Self::TargetU") + enumname = get_rust_type(name) + self.emit(f"fn fold_{name}(&mut self, node: {enumname}{apply_u}) -> Result<{enumname}{apply_target_u}, Self::Error> {{", depth) + self.emit(f"fold_{name}(self, node)", depth + 1) + self.emit("}", depth) + + +class FoldImplVisitor(TypeInfoEmitVisitor): + def visitModule(self, mod, depth): + self.emit("fn fold_located + ?Sized, T, MT>(folder: &mut F, node: Located, f: impl FnOnce(&mut F, T) -> Result) -> Result, F::Error> {", depth) + self.emit("Ok(Located { custom: folder.map_user(node.custom)?, location: node.location, node: f(folder, node.node)? })", depth + 1) + self.emit("}", depth) + for dfn in mod.dfns: + self.visit(dfn, depth) + + def visitType(self, type, depth=0): + self.visit(type.value, type.name, depth) + + def visitSum(self, sum, name, depth): + apply_t, apply_u, apply_target_u = self.get_generics(name, "T", "U", "F::TargetU") + enumname = get_rust_type(name) + is_located = bool(sum.attributes) + + self.emit(f"impl Foldable for {enumname}{apply_t} {{", depth) + self.emit(f"type Mapped = {enumname}{apply_u};", depth + 1) + self.emit("fn fold + ?Sized>(self, folder: &mut F) -> Result {", depth + 1) + self.emit(f"folder.fold_{name}(self)", depth + 2) + self.emit("}", depth + 1) + self.emit("}", depth) + + self.emit(f"pub fn fold_{name} + ?Sized>(#[allow(unused)] folder: &mut F, node: {enumname}{apply_u}) -> Result<{enumname}{apply_target_u}, F::Error> {{", depth) + if is_located: + self.emit("fold_located(folder, node, |folder, node| {", depth) + enumname += "Kind" + self.emit("match node {", depth + 1) + for cons in sum.types: + fields_pattern = self.make_pattern(cons.fields) + self.emit(f"{enumname}::{cons.name} {{ {fields_pattern} }} => {{", depth + 2) + self.gen_construction(f"{enumname}::{cons.name}", cons.fields, depth + 3) + self.emit("}", depth + 2) + self.emit("}", depth + 1) + if is_located: + self.emit("})", depth) + self.emit("}", depth) + + + def visitProduct(self, product, name, depth): + apply_t, apply_u, apply_target_u = self.get_generics(name, "T", "U", "F::TargetU") + structname = get_rust_type(name) + is_located = bool(product.attributes) + + self.emit(f"impl Foldable for {structname}{apply_t} {{", depth) + self.emit(f"type Mapped = {structname}{apply_u};", depth + 1) + self.emit("fn fold + ?Sized>(self, folder: &mut F) -> Result {", depth + 1) + self.emit(f"folder.fold_{name}(self)", depth + 2) + self.emit("}", depth + 1) + self.emit("}", depth) + + self.emit(f"pub fn fold_{name} + ?Sized>(#[allow(unused)] folder: &mut F, node: {structname}{apply_u}) -> Result<{structname}{apply_target_u}, F::Error> {{", depth) + if is_located: + self.emit("fold_located(folder, node, |folder, node| {", depth) + structname += "Data" + fields_pattern = self.make_pattern(product.fields) + self.emit(f"let {structname} {{ {fields_pattern} }} = node;", depth + 1) + self.gen_construction(structname, product.fields, depth + 1) + if is_located: + self.emit("})", depth) + self.emit("}", depth) + + def make_pattern(self, fields): + return ",".join(rust_field(f.name) for f in fields) + + def gen_construction(self, cons_path, fields, depth): + self.emit(f"Ok({cons_path} {{", depth) + for field in fields: + name = rust_field(field.name) + self.emit(f"{name}: Foldable::fold({name}, folder)?,", depth + 1) + self.emit("})", depth) + + +class FoldModuleVisitor(TypeInfoEmitVisitor): + def visitModule(self, mod): + depth = 0 + self.emit('#[cfg(feature = "fold")]', depth) + self.emit("pub mod fold {", depth) + self.emit("use super::*;", depth + 1) + self.emit("use crate::fold_helpers::Foldable;", depth + 1) + FoldTraitDefVisitor(self.file, self.typeinfo).visit(mod, depth + 1) + FoldImplVisitor(self.file, self.typeinfo).visit(mod, depth + 1) + self.emit("}", depth) + + +class ClassDefVisitor(EmitVisitor): + + def visitModule(self, mod): + for dfn in mod.dfns: + self.visit(dfn) + + def visitType(self, type, depth=0): + self.visit(type.value, type.name, depth) + + def visitSum(self, sum, name, depth): + for cons in sum.types: + self.visit(cons, sum.attributes, depth) + + def visitConstructor(self, cons, attrs, depth): + self.gen_classdef(cons.name, cons.fields, attrs, depth) + + def visitProduct(self, product, name, depth): + self.gen_classdef(name, product.fields, product.attributes, depth) + + def gen_classdef(self, name, fields, attrs, depth): + structname = "Node" + name + self.emit(f'#[pyclass(module = "_ast", name = {json.dumps(name)}, base = "AstNode")]', depth) + self.emit(f"struct {structname};", depth) + self.emit("#[pyimpl(flags(HAS_DICT, BASETYPE))]", depth) + self.emit(f"impl {structname} {{", depth) + self.emit(f"#[extend_class]", depth + 1) + self.emit("fn extend_class_with_fields(ctx: &PyContext, class: &PyTypeRef) {", depth + 1) + fields = ",".join(f"ctx.new_str({json.dumps(f.name)})" for f in fields) + self.emit(f'class.set_str_attr("_fields", ctx.new_list(vec![{fields}]));', depth + 2) + attrs = ",".join(f"ctx.new_str({json.dumps(attr.name)})" for attr in attrs) + self.emit(f'class.set_str_attr("_attributes", ctx.new_list(vec![{attrs}]));', depth + 2) + self.emit("}", depth + 1) + self.emit("}", depth) + +class ExtendModuleVisitor(EmitVisitor): + + def visitModule(self, mod): + depth = 0 + self.emit("pub fn extend_module_nodes(vm: &VirtualMachine, module: &PyObjectRef) {", depth) + self.emit("extend_module!(vm, module, {", depth + 1) + for dfn in mod.dfns: + self.visit(dfn, depth + 2) + self.emit("})", depth + 1) + self.emit("}", depth) + + def visitType(self, type, depth): + self.visit(type.value, type.name, depth) + + def visitSum(self, sum, name, depth): + for cons in sum.types: + self.visit(cons, depth) + + def visitConstructor(self, cons, depth): + self.gen_extension(cons.name, depth) + + def visitProduct(self, product, name, depth): + self.gen_extension(name, depth) + + def gen_extension(self, name, depth): + self.emit(f"{json.dumps(name)} => Node{name}::make_class(&vm.ctx),", depth) + + +class TraitImplVisitor(EmitVisitor): + + def visitModule(self, mod): + for dfn in mod.dfns: + self.visit(dfn) + + def visitType(self, type, depth=0): + self.visit(type.value, type.name, depth) + + def visitSum(self, sum, name, depth): + enumname = get_rust_type(name) + if sum.attributes: + enumname += "Kind" + + + self.emit(f"impl NamedNode for ast::{enumname} {{", depth) + self.emit(f"const NAME: &'static str = {json.dumps(name)};", depth + 1) + self.emit("}", depth) + self.emit(f"impl Node for ast::{enumname} {{", depth) + self.emit("fn ast_to_object(self, _vm: &VirtualMachine) -> PyObjectRef {", depth + 1) + self.emit("match self {", depth + 2) + for variant in sum.types: + self.constructor_to_object(variant, enumname, depth + 3) + self.emit("}", depth + 2) + self.emit("}", depth + 1) + self.emit("fn ast_from_object(_vm: &VirtualMachine, _object: PyObjectRef) -> PyResult {", depth + 1) + self.gen_sum_fromobj(sum, name, enumname, depth + 2) + self.emit("}", depth + 1) + self.emit("}", depth) + + def constructor_to_object(self, cons, enumname, depth): + fields_pattern = self.make_pattern(cons.fields) + self.emit(f"ast::{enumname}::{cons.name} {{ {fields_pattern} }} => {{", depth) + self.make_node(cons.name, cons.fields, depth + 1) + self.emit("}", depth) + + def visitProduct(self, product, name, depth): + structname = get_rust_type(name) + if product.attributes: + structname += "Data" + + self.emit(f"impl NamedNode for ast::{structname} {{", depth) + self.emit(f"const NAME: &'static str = {json.dumps(name)};", depth + 1) + self.emit("}", depth) + self.emit(f"impl Node for ast::{structname} {{", depth) + self.emit("fn ast_to_object(self, _vm: &VirtualMachine) -> PyObjectRef {", depth + 1) + fields_pattern = self.make_pattern(product.fields) + self.emit(f"let ast::{structname} {{ {fields_pattern} }} = self;", depth + 2) + self.make_node(name, product.fields, depth + 2) + self.emit("}", depth + 1) + self.emit("fn ast_from_object(_vm: &VirtualMachine, _object: PyObjectRef) -> PyResult {", depth + 1) + self.gen_product_fromobj(product, name, structname, depth + 2) + self.emit("}", depth + 1) + self.emit("}", depth) + + def make_node(self, variant, fields, depth): + lines = [] + self.emit(f"let _node = AstNode.into_ref_with_type(_vm, Node{variant}::static_type().clone()).unwrap();", depth) + if fields: + self.emit("let _dict = _node.as_object().dict().unwrap();", depth) + for f in fields: + self.emit(f"_dict.set_item({json.dumps(f.name)}, {rust_field(f.name)}.ast_to_object(_vm), _vm).unwrap();", depth) + self.emit("_node.into_object()", depth) + + def make_pattern(self, fields): + return ",".join(rust_field(f.name) for f in fields) + + def gen_sum_fromobj(self, sum, sumname, enumname, depth): + if sum.attributes: + self.extract_location(sumname, depth) + + self.emit("let _cls = _object.class();", depth) + self.emit("Ok(", depth) + for cons in sum.types: + self.emit(f"if _cls.is(Node{cons.name}::static_type()) {{", depth) + self.gen_construction(f"{enumname}::{cons.name}", cons, sumname, depth + 1) + self.emit("} else", depth) + + self.emit("{", depth) + msg = f'format!("expected some sort of {sumname}, but got {{}}",_vm.to_repr(&_object)?)' + self.emit(f"return Err(_vm.new_type_error({msg}));", depth + 1) + self.emit("})", depth) + + def gen_product_fromobj(self, product, prodname, structname, depth): + if product.attributes: + self.extract_location(prodname, depth) + + self.emit("Ok(", depth) + self.gen_construction(structname, product, prodname, depth + 1) + self.emit(")", depth) + + def gen_construction(self, cons_path, cons, name, depth): + self.emit(f"ast::{cons_path} {{", depth) + for field in cons.fields: + self.emit(f"{rust_field(field.name)}: {self.decode_field(field, name)},", depth + 1) + self.emit("}", depth) + + def extract_location(self, typename, depth): + row = self.decode_field(asdl.Field('int', 'lineno'), typename) + column = self.decode_field(asdl.Field('int', 'col_offset'), typename) + self.emit(f"let _location = ast::Location::new({row}, {column});", depth) + + def wrap_located_node(self, depth): + self.emit(f"let node = ast::Located::new(_location, node);", depth) + + def decode_field(self, field, typename): + name = json.dumps(field.name) + if field.opt and not field.seq: + return f"get_node_field_opt(_vm, &_object, {name})?.map(|obj| Node::ast_from_object(_vm, obj)).transpose()?" + else: + return f"Node::ast_from_object(_vm, get_node_field(_vm, &_object, {name}, {json.dumps(typename)})?)?" + +class ChainOfVisitors: + def __init__(self, *visitors): + self.visitors = visitors + + def visit(self, object): + for v in self.visitors: + v.visit(object) + v.emit("", 0) + + +def write_ast_def(mod, typeinfo, f): + f.write('pub use crate::location::Location;\n') + f.write('pub use crate::constant::*;\n') + f.write('\n') + f.write('type Ident = String;\n') + f.write('\n') + StructVisitor(f, typeinfo).emit_attrs(0) + f.write('pub struct Located {\n') + f.write(' pub location: Location,\n') + f.write(' pub custom: U,\n') + f.write(' pub node: T,\n') + f.write('}\n') + f.write('\n') + f.write('impl Located {\n') + f.write(' pub fn new(location: Location, node: T) -> Self {\n') + f.write(' Self { location, custom: (), node }\n') + f.write(' }\n') + f.write('}\n') + f.write('\n') + + c = ChainOfVisitors(StructVisitor(f, typeinfo), + FoldModuleVisitor(f, typeinfo)) + c.visit(mod) + + +def write_ast_mod(mod, f): + f.write('use super::*;\n') + f.write('\n') + + c = ChainOfVisitors(ClassDefVisitor(f), + TraitImplVisitor(f), + ExtendModuleVisitor(f)) + c.visit(mod) + +def main(input_filename, ast_mod_filename, ast_def_filename, dump_module=False): + auto_gen_msg = AUTOGEN_MESSAGE.format("/".join(Path(__file__).parts[-2:])) + mod = asdl.parse(input_filename) + if dump_module: + print('Parsed Module:') + print(mod) + if not asdl.check(mod): + sys.exit(1) + + typeinfo = {} + FindUserdataTypesVisitor(typeinfo).visit(mod) + + with ast_def_filename.open("w") as def_file, \ + ast_mod_filename.open("w") as mod_file: + def_file.write(auto_gen_msg) + write_ast_def(mod, typeinfo, def_file) + + mod_file.write(auto_gen_msg) + write_ast_mod(mod, mod_file) + + print(f"{ast_def_filename}, {ast_mod_filename} regenerated.") + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument("input_file", type=Path) + parser.add_argument("-M", "--mod-file", type=Path, required=True) + parser.add_argument("-D", "--def-file", type=Path, required=True) + parser.add_argument("-d", "--dump-module", action="store_true") + + args = parser.parse_args() + main(args.input_file, args.mod_file, args.def_file, args.dump_module) diff --git a/nac3ast/src/ast_gen.rs b/nac3ast/src/ast_gen.rs new file mode 100644 index 00000000..31380e08 --- /dev/null +++ b/nac3ast/src/ast_gen.rs @@ -0,0 +1,1136 @@ +// File automatically generated by ast/asdl_rs.py. + +pub use crate::constant::*; +pub use crate::location::Location; + +type Ident = String; + +#[derive(Debug, PartialEq)] +pub struct Located { + pub location: Location, + pub custom: U, + pub node: T, +} + +impl Located { + pub fn new(location: Location, node: T) -> Self { + Self { + location, + custom: (), + node, + } + } +} + +#[derive(Debug, PartialEq)] +pub enum Mod { + Module { + body: Vec>, + type_ignores: Vec, + }, + Interactive { + body: Vec>, + }, + Expression { + body: Box>, + }, + FunctionType { + argtypes: Vec>, + returns: Box>, + }, +} + +#[derive(Debug, PartialEq)] +pub enum StmtKind { + FunctionDef { + name: Ident, + args: Box>, + body: Vec>, + decorator_list: Vec>, + returns: Option>>, + type_comment: Option, + }, + AsyncFunctionDef { + name: Ident, + args: Box>, + body: Vec>, + decorator_list: Vec>, + returns: Option>>, + type_comment: Option, + }, + ClassDef { + name: Ident, + bases: Vec>, + keywords: Vec>, + body: Vec>, + decorator_list: Vec>, + }, + Return { + value: Option>>, + }, + Delete { + targets: Vec>, + }, + Assign { + targets: Vec>, + value: Box>, + type_comment: Option, + }, + AugAssign { + target: Box>, + op: Operator, + value: Box>, + }, + AnnAssign { + target: Box>, + annotation: Box>, + value: Option>>, + simple: bool, + }, + For { + target: Box>, + iter: Box>, + body: Vec>, + orelse: Vec>, + type_comment: Option, + }, + AsyncFor { + target: Box>, + iter: Box>, + body: Vec>, + orelse: Vec>, + type_comment: Option, + }, + While { + test: Box>, + body: Vec>, + orelse: Vec>, + }, + If { + test: Box>, + body: Vec>, + orelse: Vec>, + }, + With { + items: Vec>, + body: Vec>, + type_comment: Option, + }, + AsyncWith { + items: Vec>, + body: Vec>, + type_comment: Option, + }, + Raise { + exc: Option>>, + cause: Option>>, + }, + Try { + body: Vec>, + handlers: Vec>, + orelse: Vec>, + finalbody: Vec>, + }, + Assert { + test: Box>, + msg: Option>>, + }, + Import { + names: Vec, + }, + ImportFrom { + module: Option, + names: Vec, + level: usize, + }, + Global { + names: Vec, + }, + Nonlocal { + names: Vec, + }, + Expr { + value: Box>, + }, + Pass, + Break, + Continue, +} +pub type Stmt = Located, U>; + +#[derive(Debug, PartialEq)] +pub enum ExprKind { + BoolOp { + op: Boolop, + values: Vec>, + }, + NamedExpr { + target: Box>, + value: Box>, + }, + BinOp { + left: Box>, + op: Operator, + right: Box>, + }, + UnaryOp { + op: Unaryop, + operand: Box>, + }, + Lambda { + args: Box>, + body: Box>, + }, + IfExp { + test: Box>, + body: Box>, + orelse: Box>, + }, + Dict { + keys: Vec>>>, + values: Vec>, + }, + Set { + elts: Vec>, + }, + ListComp { + elt: Box>, + generators: Vec>, + }, + SetComp { + elt: Box>, + generators: Vec>, + }, + DictComp { + key: Box>, + value: Box>, + generators: Vec>, + }, + GeneratorExp { + elt: Box>, + generators: Vec>, + }, + Await { + value: Box>, + }, + Yield { + value: Option>>, + }, + YieldFrom { + value: Box>, + }, + Compare { + left: Box>, + ops: Vec, + comparators: Vec>, + }, + Call { + func: Box>, + args: Vec>, + keywords: Vec>, + }, + FormattedValue { + value: Box>, + conversion: Option, + format_spec: Option>>, + }, + JoinedStr { + values: Vec>, + }, + Constant { + value: Constant, + kind: Option, + }, + Attribute { + value: Box>, + attr: Ident, + ctx: ExprContext, + }, + Subscript { + value: Box>, + slice: Box>, + ctx: ExprContext, + }, + Starred { + value: Box>, + ctx: ExprContext, + }, + Name { + id: Ident, + ctx: ExprContext, + }, + List { + elts: Vec>, + ctx: ExprContext, + }, + Tuple { + elts: Vec>, + ctx: ExprContext, + }, + Slice { + lower: Option>>, + upper: Option>>, + step: Option>>, + }, +} +pub type Expr = Located, U>; + +#[derive(Debug, PartialEq)] +pub enum ExprContext { + Load, + Store, + Del, +} + +#[derive(Debug, PartialEq)] +pub enum Boolop { + And, + Or, +} + +#[derive(Debug, PartialEq)] +pub enum Operator { + Add, + Sub, + Mult, + MatMult, + Div, + Mod, + Pow, + LShift, + RShift, + BitOr, + BitXor, + BitAnd, + FloorDiv, +} + +#[derive(Debug, PartialEq)] +pub enum Unaryop { + Invert, + Not, + UAdd, + USub, +} + +#[derive(Debug, PartialEq)] +pub enum Cmpop { + Eq, + NotEq, + Lt, + LtE, + Gt, + GtE, + Is, + IsNot, + In, + NotIn, +} + +#[derive(Debug, PartialEq)] +pub struct Comprehension { + pub target: Box>, + pub iter: Box>, + pub ifs: Vec>, + pub is_async: bool, +} + +#[derive(Debug, PartialEq)] +pub enum ExcepthandlerKind { + ExceptHandler { + type_: Option>>, + name: Option, + body: Vec>, + }, +} +pub type Excepthandler = Located, U>; + +#[derive(Debug, PartialEq)] +pub struct Arguments { + pub posonlyargs: Vec>, + pub args: Vec>, + pub vararg: Option>>, + pub kwonlyargs: Vec>, + pub kw_defaults: Vec>>>, + pub kwarg: Option>>, + pub defaults: Vec>, +} + +#[derive(Debug, PartialEq)] +pub struct ArgData { + pub arg: Ident, + pub annotation: Option>>, + pub type_comment: Option, +} +pub type Arg = Located, U>; + +#[derive(Debug, PartialEq)] +pub struct KeywordData { + pub arg: Option, + pub value: Box>, +} +pub type Keyword = Located, U>; + +#[derive(Debug, PartialEq)] +pub struct Alias { + pub name: Ident, + pub asname: Option, +} + +#[derive(Debug, PartialEq)] +pub struct Withitem { + pub context_expr: Box>, + pub optional_vars: Option>>, +} + +#[derive(Debug, PartialEq)] +pub enum TypeIgnore { + TypeIgnore { lineno: usize, tag: String }, +} + +#[cfg(feature = "fold")] +pub mod fold { + use super::*; + use crate::fold_helpers::Foldable; + pub trait Fold { + type TargetU; + type Error; + fn map_user(&mut self, user: U) -> Result; + fn fold_mod(&mut self, node: Mod) -> Result, Self::Error> { + fold_mod(self, node) + } + fn fold_stmt(&mut self, node: Stmt) -> Result, Self::Error> { + fold_stmt(self, node) + } + fn fold_expr(&mut self, node: Expr) -> Result, Self::Error> { + fold_expr(self, node) + } + fn fold_expr_context(&mut self, node: ExprContext) -> Result { + fold_expr_context(self, node) + } + fn fold_boolop(&mut self, node: Boolop) -> Result { + fold_boolop(self, node) + } + fn fold_operator(&mut self, node: Operator) -> Result { + fold_operator(self, node) + } + fn fold_unaryop(&mut self, node: Unaryop) -> Result { + fold_unaryop(self, node) + } + fn fold_cmpop(&mut self, node: Cmpop) -> Result { + fold_cmpop(self, node) + } + fn fold_comprehension( + &mut self, + node: Comprehension, + ) -> Result, Self::Error> { + fold_comprehension(self, node) + } + fn fold_excepthandler( + &mut self, + node: Excepthandler, + ) -> Result, Self::Error> { + fold_excepthandler(self, node) + } + fn fold_arguments( + &mut self, + node: Arguments, + ) -> Result, Self::Error> { + fold_arguments(self, node) + } + fn fold_arg(&mut self, node: Arg) -> Result, Self::Error> { + fold_arg(self, node) + } + fn fold_keyword( + &mut self, + node: Keyword, + ) -> Result, Self::Error> { + fold_keyword(self, node) + } + fn fold_alias(&mut self, node: Alias) -> Result { + fold_alias(self, node) + } + fn fold_withitem( + &mut self, + node: Withitem, + ) -> Result, Self::Error> { + fold_withitem(self, node) + } + fn fold_type_ignore(&mut self, node: TypeIgnore) -> Result { + fold_type_ignore(self, node) + } + } + fn fold_located + ?Sized, T, MT>( + folder: &mut F, + node: Located, + f: impl FnOnce(&mut F, T) -> Result, + ) -> Result, F::Error> { + Ok(Located { + custom: folder.map_user(node.custom)?, + location: node.location, + node: f(folder, node.node)?, + }) + } + impl Foldable for Mod { + type Mapped = Mod; + fn fold + ?Sized>( + self, + folder: &mut F, + ) -> Result { + folder.fold_mod(self) + } + } + pub fn fold_mod + ?Sized>( + #[allow(unused)] folder: &mut F, + node: Mod, + ) -> Result, F::Error> { + match node { + Mod::Module { body, type_ignores } => Ok(Mod::Module { + body: Foldable::fold(body, folder)?, + type_ignores: Foldable::fold(type_ignores, folder)?, + }), + Mod::Interactive { body } => Ok(Mod::Interactive { + body: Foldable::fold(body, folder)?, + }), + Mod::Expression { body } => Ok(Mod::Expression { + body: Foldable::fold(body, folder)?, + }), + Mod::FunctionType { argtypes, returns } => Ok(Mod::FunctionType { + argtypes: Foldable::fold(argtypes, folder)?, + returns: Foldable::fold(returns, folder)?, + }), + } + } + impl Foldable for Stmt { + type Mapped = Stmt; + fn fold + ?Sized>( + self, + folder: &mut F, + ) -> Result { + folder.fold_stmt(self) + } + } + pub fn fold_stmt + ?Sized>( + #[allow(unused)] folder: &mut F, + node: Stmt, + ) -> Result, F::Error> { + fold_located(folder, node, |folder, node| match node { + StmtKind::FunctionDef { + name, + args, + body, + decorator_list, + returns, + type_comment, + } => Ok(StmtKind::FunctionDef { + name: Foldable::fold(name, folder)?, + args: Foldable::fold(args, folder)?, + body: Foldable::fold(body, folder)?, + decorator_list: Foldable::fold(decorator_list, folder)?, + returns: Foldable::fold(returns, folder)?, + type_comment: Foldable::fold(type_comment, folder)?, + }), + StmtKind::AsyncFunctionDef { + name, + args, + body, + decorator_list, + returns, + type_comment, + } => Ok(StmtKind::AsyncFunctionDef { + name: Foldable::fold(name, folder)?, + args: Foldable::fold(args, folder)?, + body: Foldable::fold(body, folder)?, + decorator_list: Foldable::fold(decorator_list, folder)?, + returns: Foldable::fold(returns, folder)?, + type_comment: Foldable::fold(type_comment, folder)?, + }), + StmtKind::ClassDef { + name, + bases, + keywords, + body, + decorator_list, + } => Ok(StmtKind::ClassDef { + name: Foldable::fold(name, folder)?, + bases: Foldable::fold(bases, folder)?, + keywords: Foldable::fold(keywords, folder)?, + body: Foldable::fold(body, folder)?, + decorator_list: Foldable::fold(decorator_list, folder)?, + }), + StmtKind::Return { value } => Ok(StmtKind::Return { + value: Foldable::fold(value, folder)?, + }), + StmtKind::Delete { targets } => Ok(StmtKind::Delete { + targets: Foldable::fold(targets, folder)?, + }), + StmtKind::Assign { + targets, + value, + type_comment, + } => Ok(StmtKind::Assign { + targets: Foldable::fold(targets, folder)?, + value: Foldable::fold(value, folder)?, + type_comment: Foldable::fold(type_comment, folder)?, + }), + StmtKind::AugAssign { target, op, value } => Ok(StmtKind::AugAssign { + target: Foldable::fold(target, folder)?, + op: Foldable::fold(op, folder)?, + value: Foldable::fold(value, folder)?, + }), + StmtKind::AnnAssign { + target, + annotation, + value, + simple, + } => Ok(StmtKind::AnnAssign { + target: Foldable::fold(target, folder)?, + annotation: Foldable::fold(annotation, folder)?, + value: Foldable::fold(value, folder)?, + simple: Foldable::fold(simple, folder)?, + }), + StmtKind::For { + target, + iter, + body, + orelse, + type_comment, + } => Ok(StmtKind::For { + target: Foldable::fold(target, folder)?, + iter: Foldable::fold(iter, folder)?, + body: Foldable::fold(body, folder)?, + orelse: Foldable::fold(orelse, folder)?, + type_comment: Foldable::fold(type_comment, folder)?, + }), + StmtKind::AsyncFor { + target, + iter, + body, + orelse, + type_comment, + } => Ok(StmtKind::AsyncFor { + target: Foldable::fold(target, folder)?, + iter: Foldable::fold(iter, folder)?, + body: Foldable::fold(body, folder)?, + orelse: Foldable::fold(orelse, folder)?, + type_comment: Foldable::fold(type_comment, folder)?, + }), + StmtKind::While { test, body, orelse } => Ok(StmtKind::While { + test: Foldable::fold(test, folder)?, + body: Foldable::fold(body, folder)?, + orelse: Foldable::fold(orelse, folder)?, + }), + StmtKind::If { test, body, orelse } => Ok(StmtKind::If { + test: Foldable::fold(test, folder)?, + body: Foldable::fold(body, folder)?, + orelse: Foldable::fold(orelse, folder)?, + }), + StmtKind::With { + items, + body, + type_comment, + } => Ok(StmtKind::With { + items: Foldable::fold(items, folder)?, + body: Foldable::fold(body, folder)?, + type_comment: Foldable::fold(type_comment, folder)?, + }), + StmtKind::AsyncWith { + items, + body, + type_comment, + } => Ok(StmtKind::AsyncWith { + items: Foldable::fold(items, folder)?, + body: Foldable::fold(body, folder)?, + type_comment: Foldable::fold(type_comment, folder)?, + }), + StmtKind::Raise { exc, cause } => Ok(StmtKind::Raise { + exc: Foldable::fold(exc, folder)?, + cause: Foldable::fold(cause, folder)?, + }), + StmtKind::Try { + body, + handlers, + orelse, + finalbody, + } => Ok(StmtKind::Try { + body: Foldable::fold(body, folder)?, + handlers: Foldable::fold(handlers, folder)?, + orelse: Foldable::fold(orelse, folder)?, + finalbody: Foldable::fold(finalbody, folder)?, + }), + StmtKind::Assert { test, msg } => Ok(StmtKind::Assert { + test: Foldable::fold(test, folder)?, + msg: Foldable::fold(msg, folder)?, + }), + StmtKind::Import { names } => Ok(StmtKind::Import { + names: Foldable::fold(names, folder)?, + }), + StmtKind::ImportFrom { + module, + names, + level, + } => Ok(StmtKind::ImportFrom { + module: Foldable::fold(module, folder)?, + names: Foldable::fold(names, folder)?, + level: Foldable::fold(level, folder)?, + }), + StmtKind::Global { names } => Ok(StmtKind::Global { + names: Foldable::fold(names, folder)?, + }), + StmtKind::Nonlocal { names } => Ok(StmtKind::Nonlocal { + names: Foldable::fold(names, folder)?, + }), + StmtKind::Expr { value } => Ok(StmtKind::Expr { + value: Foldable::fold(value, folder)?, + }), + StmtKind::Pass {} => Ok(StmtKind::Pass {}), + StmtKind::Break {} => Ok(StmtKind::Break {}), + StmtKind::Continue {} => Ok(StmtKind::Continue {}), + }) + } + impl Foldable for Expr { + type Mapped = Expr; + fn fold + ?Sized>( + self, + folder: &mut F, + ) -> Result { + folder.fold_expr(self) + } + } + pub fn fold_expr + ?Sized>( + #[allow(unused)] folder: &mut F, + node: Expr, + ) -> Result, F::Error> { + fold_located(folder, node, |folder, node| match node { + ExprKind::BoolOp { op, values } => Ok(ExprKind::BoolOp { + op: Foldable::fold(op, folder)?, + values: Foldable::fold(values, folder)?, + }), + ExprKind::NamedExpr { target, value } => Ok(ExprKind::NamedExpr { + target: Foldable::fold(target, folder)?, + value: Foldable::fold(value, folder)?, + }), + ExprKind::BinOp { left, op, right } => Ok(ExprKind::BinOp { + left: Foldable::fold(left, folder)?, + op: Foldable::fold(op, folder)?, + right: Foldable::fold(right, folder)?, + }), + ExprKind::UnaryOp { op, operand } => Ok(ExprKind::UnaryOp { + op: Foldable::fold(op, folder)?, + operand: Foldable::fold(operand, folder)?, + }), + ExprKind::Lambda { args, body } => Ok(ExprKind::Lambda { + args: Foldable::fold(args, folder)?, + body: Foldable::fold(body, folder)?, + }), + ExprKind::IfExp { test, body, orelse } => Ok(ExprKind::IfExp { + test: Foldable::fold(test, folder)?, + body: Foldable::fold(body, folder)?, + orelse: Foldable::fold(orelse, folder)?, + }), + ExprKind::Dict { keys, values } => Ok(ExprKind::Dict { + keys: Foldable::fold(keys, folder)?, + values: Foldable::fold(values, folder)?, + }), + ExprKind::Set { elts } => Ok(ExprKind::Set { + elts: Foldable::fold(elts, folder)?, + }), + ExprKind::ListComp { elt, generators } => Ok(ExprKind::ListComp { + elt: Foldable::fold(elt, folder)?, + generators: Foldable::fold(generators, folder)?, + }), + ExprKind::SetComp { elt, generators } => Ok(ExprKind::SetComp { + elt: Foldable::fold(elt, folder)?, + generators: Foldable::fold(generators, folder)?, + }), + ExprKind::DictComp { + key, + value, + generators, + } => Ok(ExprKind::DictComp { + key: Foldable::fold(key, folder)?, + value: Foldable::fold(value, folder)?, + generators: Foldable::fold(generators, folder)?, + }), + ExprKind::GeneratorExp { elt, generators } => Ok(ExprKind::GeneratorExp { + elt: Foldable::fold(elt, folder)?, + generators: Foldable::fold(generators, folder)?, + }), + ExprKind::Await { value } => Ok(ExprKind::Await { + value: Foldable::fold(value, folder)?, + }), + ExprKind::Yield { value } => Ok(ExprKind::Yield { + value: Foldable::fold(value, folder)?, + }), + ExprKind::YieldFrom { value } => Ok(ExprKind::YieldFrom { + value: Foldable::fold(value, folder)?, + }), + ExprKind::Compare { + left, + ops, + comparators, + } => Ok(ExprKind::Compare { + left: Foldable::fold(left, folder)?, + ops: Foldable::fold(ops, folder)?, + comparators: Foldable::fold(comparators, folder)?, + }), + ExprKind::Call { + func, + args, + keywords, + } => Ok(ExprKind::Call { + func: Foldable::fold(func, folder)?, + args: Foldable::fold(args, folder)?, + keywords: Foldable::fold(keywords, folder)?, + }), + ExprKind::FormattedValue { + value, + conversion, + format_spec, + } => Ok(ExprKind::FormattedValue { + value: Foldable::fold(value, folder)?, + conversion: Foldable::fold(conversion, folder)?, + format_spec: Foldable::fold(format_spec, folder)?, + }), + ExprKind::JoinedStr { values } => Ok(ExprKind::JoinedStr { + values: Foldable::fold(values, folder)?, + }), + ExprKind::Constant { value, kind } => Ok(ExprKind::Constant { + value: Foldable::fold(value, folder)?, + kind: Foldable::fold(kind, folder)?, + }), + ExprKind::Attribute { value, attr, ctx } => Ok(ExprKind::Attribute { + value: Foldable::fold(value, folder)?, + attr: Foldable::fold(attr, folder)?, + ctx: Foldable::fold(ctx, folder)?, + }), + ExprKind::Subscript { value, slice, ctx } => Ok(ExprKind::Subscript { + value: Foldable::fold(value, folder)?, + slice: Foldable::fold(slice, folder)?, + ctx: Foldable::fold(ctx, folder)?, + }), + ExprKind::Starred { value, ctx } => Ok(ExprKind::Starred { + value: Foldable::fold(value, folder)?, + ctx: Foldable::fold(ctx, folder)?, + }), + ExprKind::Name { id, ctx } => Ok(ExprKind::Name { + id: Foldable::fold(id, folder)?, + ctx: Foldable::fold(ctx, folder)?, + }), + ExprKind::List { elts, ctx } => Ok(ExprKind::List { + elts: Foldable::fold(elts, folder)?, + ctx: Foldable::fold(ctx, folder)?, + }), + ExprKind::Tuple { elts, ctx } => Ok(ExprKind::Tuple { + elts: Foldable::fold(elts, folder)?, + ctx: Foldable::fold(ctx, folder)?, + }), + ExprKind::Slice { lower, upper, step } => Ok(ExprKind::Slice { + lower: Foldable::fold(lower, folder)?, + upper: Foldable::fold(upper, folder)?, + step: Foldable::fold(step, folder)?, + }), + }) + } + impl Foldable for ExprContext { + type Mapped = ExprContext; + fn fold + ?Sized>( + self, + folder: &mut F, + ) -> Result { + folder.fold_expr_context(self) + } + } + pub fn fold_expr_context + ?Sized>( + #[allow(unused)] folder: &mut F, + node: ExprContext, + ) -> Result { + match node { + ExprContext::Load {} => Ok(ExprContext::Load {}), + ExprContext::Store {} => Ok(ExprContext::Store {}), + ExprContext::Del {} => Ok(ExprContext::Del {}), + } + } + impl Foldable for Boolop { + type Mapped = Boolop; + fn fold + ?Sized>( + self, + folder: &mut F, + ) -> Result { + folder.fold_boolop(self) + } + } + pub fn fold_boolop + ?Sized>( + #[allow(unused)] folder: &mut F, + node: Boolop, + ) -> Result { + match node { + Boolop::And {} => Ok(Boolop::And {}), + Boolop::Or {} => Ok(Boolop::Or {}), + } + } + impl Foldable for Operator { + type Mapped = Operator; + fn fold + ?Sized>( + self, + folder: &mut F, + ) -> Result { + folder.fold_operator(self) + } + } + pub fn fold_operator + ?Sized>( + #[allow(unused)] folder: &mut F, + node: Operator, + ) -> Result { + match node { + Operator::Add {} => Ok(Operator::Add {}), + Operator::Sub {} => Ok(Operator::Sub {}), + Operator::Mult {} => Ok(Operator::Mult {}), + Operator::MatMult {} => Ok(Operator::MatMult {}), + Operator::Div {} => Ok(Operator::Div {}), + Operator::Mod {} => Ok(Operator::Mod {}), + Operator::Pow {} => Ok(Operator::Pow {}), + Operator::LShift {} => Ok(Operator::LShift {}), + Operator::RShift {} => Ok(Operator::RShift {}), + Operator::BitOr {} => Ok(Operator::BitOr {}), + Operator::BitXor {} => Ok(Operator::BitXor {}), + Operator::BitAnd {} => Ok(Operator::BitAnd {}), + Operator::FloorDiv {} => Ok(Operator::FloorDiv {}), + } + } + impl Foldable for Unaryop { + type Mapped = Unaryop; + fn fold + ?Sized>( + self, + folder: &mut F, + ) -> Result { + folder.fold_unaryop(self) + } + } + pub fn fold_unaryop + ?Sized>( + #[allow(unused)] folder: &mut F, + node: Unaryop, + ) -> Result { + match node { + Unaryop::Invert {} => Ok(Unaryop::Invert {}), + Unaryop::Not {} => Ok(Unaryop::Not {}), + Unaryop::UAdd {} => Ok(Unaryop::UAdd {}), + Unaryop::USub {} => Ok(Unaryop::USub {}), + } + } + impl Foldable for Cmpop { + type Mapped = Cmpop; + fn fold + ?Sized>( + self, + folder: &mut F, + ) -> Result { + folder.fold_cmpop(self) + } + } + pub fn fold_cmpop + ?Sized>( + #[allow(unused)] folder: &mut F, + node: Cmpop, + ) -> Result { + match node { + Cmpop::Eq {} => Ok(Cmpop::Eq {}), + Cmpop::NotEq {} => Ok(Cmpop::NotEq {}), + Cmpop::Lt {} => Ok(Cmpop::Lt {}), + Cmpop::LtE {} => Ok(Cmpop::LtE {}), + Cmpop::Gt {} => Ok(Cmpop::Gt {}), + Cmpop::GtE {} => Ok(Cmpop::GtE {}), + Cmpop::Is {} => Ok(Cmpop::Is {}), + Cmpop::IsNot {} => Ok(Cmpop::IsNot {}), + Cmpop::In {} => Ok(Cmpop::In {}), + Cmpop::NotIn {} => Ok(Cmpop::NotIn {}), + } + } + impl Foldable for Comprehension { + type Mapped = Comprehension; + fn fold + ?Sized>( + self, + folder: &mut F, + ) -> Result { + folder.fold_comprehension(self) + } + } + pub fn fold_comprehension + ?Sized>( + #[allow(unused)] folder: &mut F, + node: Comprehension, + ) -> Result, F::Error> { + let Comprehension { + target, + iter, + ifs, + is_async, + } = node; + Ok(Comprehension { + target: Foldable::fold(target, folder)?, + iter: Foldable::fold(iter, folder)?, + ifs: Foldable::fold(ifs, folder)?, + is_async: Foldable::fold(is_async, folder)?, + }) + } + impl Foldable for Excepthandler { + type Mapped = Excepthandler; + fn fold + ?Sized>( + self, + folder: &mut F, + ) -> Result { + folder.fold_excepthandler(self) + } + } + pub fn fold_excepthandler + ?Sized>( + #[allow(unused)] folder: &mut F, + node: Excepthandler, + ) -> Result, F::Error> { + fold_located(folder, node, |folder, node| match node { + ExcepthandlerKind::ExceptHandler { type_, name, body } => { + Ok(ExcepthandlerKind::ExceptHandler { + type_: Foldable::fold(type_, folder)?, + name: Foldable::fold(name, folder)?, + body: Foldable::fold(body, folder)?, + }) + } + }) + } + impl Foldable for Arguments { + type Mapped = Arguments; + fn fold + ?Sized>( + self, + folder: &mut F, + ) -> Result { + folder.fold_arguments(self) + } + } + pub fn fold_arguments + ?Sized>( + #[allow(unused)] folder: &mut F, + node: Arguments, + ) -> Result, F::Error> { + let Arguments { + posonlyargs, + args, + vararg, + kwonlyargs, + kw_defaults, + kwarg, + defaults, + } = node; + Ok(Arguments { + posonlyargs: Foldable::fold(posonlyargs, folder)?, + args: Foldable::fold(args, folder)?, + vararg: Foldable::fold(vararg, folder)?, + kwonlyargs: Foldable::fold(kwonlyargs, folder)?, + kw_defaults: Foldable::fold(kw_defaults, folder)?, + kwarg: Foldable::fold(kwarg, folder)?, + defaults: Foldable::fold(defaults, folder)?, + }) + } + impl Foldable for Arg { + type Mapped = Arg; + fn fold + ?Sized>( + self, + folder: &mut F, + ) -> Result { + folder.fold_arg(self) + } + } + pub fn fold_arg + ?Sized>( + #[allow(unused)] folder: &mut F, + node: Arg, + ) -> Result, F::Error> { + fold_located(folder, node, |folder, node| { + let ArgData { + arg, + annotation, + type_comment, + } = node; + Ok(ArgData { + arg: Foldable::fold(arg, folder)?, + annotation: Foldable::fold(annotation, folder)?, + type_comment: Foldable::fold(type_comment, folder)?, + }) + }) + } + impl Foldable for Keyword { + type Mapped = Keyword; + fn fold + ?Sized>( + self, + folder: &mut F, + ) -> Result { + folder.fold_keyword(self) + } + } + pub fn fold_keyword + ?Sized>( + #[allow(unused)] folder: &mut F, + node: Keyword, + ) -> Result, F::Error> { + fold_located(folder, node, |folder, node| { + let KeywordData { arg, value } = node; + Ok(KeywordData { + arg: Foldable::fold(arg, folder)?, + value: Foldable::fold(value, folder)?, + }) + }) + } + impl Foldable for Alias { + type Mapped = Alias; + fn fold + ?Sized>( + self, + folder: &mut F, + ) -> Result { + folder.fold_alias(self) + } + } + pub fn fold_alias + ?Sized>( + #[allow(unused)] folder: &mut F, + node: Alias, + ) -> Result { + let Alias { name, asname } = node; + Ok(Alias { + name: Foldable::fold(name, folder)?, + asname: Foldable::fold(asname, folder)?, + }) + } + impl Foldable for Withitem { + type Mapped = Withitem; + fn fold + ?Sized>( + self, + folder: &mut F, + ) -> Result { + folder.fold_withitem(self) + } + } + pub fn fold_withitem + ?Sized>( + #[allow(unused)] folder: &mut F, + node: Withitem, + ) -> Result, F::Error> { + let Withitem { + context_expr, + optional_vars, + } = node; + Ok(Withitem { + context_expr: Foldable::fold(context_expr, folder)?, + optional_vars: Foldable::fold(optional_vars, folder)?, + }) + } + impl Foldable for TypeIgnore { + type Mapped = TypeIgnore; + fn fold + ?Sized>( + self, + folder: &mut F, + ) -> Result { + folder.fold_type_ignore(self) + } + } + pub fn fold_type_ignore + ?Sized>( + #[allow(unused)] folder: &mut F, + node: TypeIgnore, + ) -> Result { + match node { + TypeIgnore::TypeIgnore { lineno, tag } => Ok(TypeIgnore::TypeIgnore { + lineno: Foldable::fold(lineno, folder)?, + tag: Foldable::fold(tag, folder)?, + }), + } + } +} diff --git a/nac3ast/src/constant.rs b/nac3ast/src/constant.rs new file mode 100644 index 00000000..0a627783 --- /dev/null +++ b/nac3ast/src/constant.rs @@ -0,0 +1,210 @@ +use num_bigint::BigInt; + +#[derive(Debug, PartialEq)] +pub enum Constant { + None, + Bool(bool), + Str(String), + Bytes(Vec), + Int(BigInt), + Tuple(Vec), + Float(f64), + Complex { real: f64, imag: f64 }, + Ellipsis, +} + +impl From for Constant { + fn from(s: String) -> Constant { + Self::Str(s) + } +} +impl From> for Constant { + fn from(b: Vec) -> Constant { + Self::Bytes(b) + } +} +impl From for Constant { + fn from(b: bool) -> Constant { + Self::Bool(b) + } +} +impl From for Constant { + fn from(i: BigInt) -> Constant { + Self::Int(i) + } +} + +/// Transforms a value prior to formatting it. +#[derive(Copy, Clone, Debug, PartialEq)] +#[repr(u8)] +pub enum ConversionFlag { + /// Converts by calling `str()`. + Str = b's', + /// Converts by calling `ascii()`. + Ascii = b'a', + /// Converts by calling `repr()`. + Repr = b'r', +} + +impl ConversionFlag { + pub fn try_from_byte(b: u8) -> Option { + match b { + b's' => Some(Self::Str), + b'a' => Some(Self::Ascii), + b'r' => Some(Self::Repr), + _ => None, + } + } +} + +#[cfg(feature = "constant-optimization")] +#[derive(Default)] +pub struct ConstantOptimizer { + _priv: (), +} + +#[cfg(feature = "constant-optimization")] +impl ConstantOptimizer { + #[inline] + pub fn new() -> Self { + Self { _priv: () } + } +} + +#[cfg(feature = "constant-optimization")] +impl crate::fold::Fold for ConstantOptimizer { + type TargetU = U; + type Error = std::convert::Infallible; + #[inline] + fn map_user(&mut self, user: U) -> Result { + Ok(user) + } + fn fold_expr(&mut self, node: crate::Expr) -> Result, Self::Error> { + match node.node { + crate::ExprKind::Tuple { elts, ctx } => { + let elts = elts + .into_iter() + .map(|x| self.fold_expr(x)) + .collect::, _>>()?; + let expr = if elts + .iter() + .all(|e| matches!(e.node, crate::ExprKind::Constant { .. })) + { + let tuple = elts + .into_iter() + .map(|e| match e.node { + crate::ExprKind::Constant { value, .. } => value, + _ => unreachable!(), + }) + .collect(); + crate::ExprKind::Constant { + value: Constant::Tuple(tuple), + kind: None, + } + } else { + crate::ExprKind::Tuple { elts, ctx } + }; + Ok(crate::Expr { + node: expr, + custom: node.custom, + location: node.location, + }) + } + _ => crate::fold::fold_expr(self, node), + } + } +} + +#[cfg(test)] +mod tests { + #[cfg(feature = "constant-optimization")] + #[test] + fn test_constant_opt() { + use super::*; + use crate::fold::Fold; + use crate::*; + + let location = Location::new(0, 0); + let custom = (); + let ast = Located { + location, + custom, + node: ExprKind::Tuple { + ctx: ExprContext::Load, + elts: vec![ + Located { + location, + custom, + node: ExprKind::Constant { + value: BigInt::from(1).into(), + kind: None, + }, + }, + Located { + location, + custom, + node: ExprKind::Constant { + value: BigInt::from(2).into(), + kind: None, + }, + }, + Located { + location, + custom, + node: ExprKind::Tuple { + ctx: ExprContext::Load, + elts: vec![ + Located { + location, + custom, + node: ExprKind::Constant { + value: BigInt::from(3).into(), + kind: None, + }, + }, + Located { + location, + custom, + node: ExprKind::Constant { + value: BigInt::from(4).into(), + kind: None, + }, + }, + Located { + location, + custom, + node: ExprKind::Constant { + value: BigInt::from(5).into(), + kind: None, + }, + }, + ], + }, + }, + ], + }, + }; + let new_ast = ConstantOptimizer::new() + .fold_expr(ast) + .unwrap_or_else(|e| match e {}); + assert_eq!( + new_ast, + Located { + location, + custom, + node: ExprKind::Constant { + value: Constant::Tuple(vec![ + BigInt::from(1).into(), + BigInt::from(2).into(), + Constant::Tuple(vec![ + BigInt::from(3).into(), + BigInt::from(4).into(), + BigInt::from(5).into(), + ]) + ]), + kind: None + }, + } + ); + } +} diff --git a/nac3ast/src/fold_helpers.rs b/nac3ast/src/fold_helpers.rs new file mode 100644 index 00000000..26a08b2f --- /dev/null +++ b/nac3ast/src/fold_helpers.rs @@ -0,0 +1,72 @@ +use crate::constant; +use crate::fold::Fold; + +pub(crate) trait Foldable { + type Mapped; + fn fold + ?Sized>( + self, + folder: &mut F, + ) -> Result; +} + +impl Foldable for Vec +where + X: Foldable, +{ + type Mapped = Vec; + fn fold + ?Sized>( + self, + folder: &mut F, + ) -> Result { + self.into_iter().map(|x| x.fold(folder)).collect() + } +} + +impl Foldable for Option +where + X: Foldable, +{ + type Mapped = Option; + fn fold + ?Sized>( + self, + folder: &mut F, + ) -> Result { + self.map(|x| x.fold(folder)).transpose() + } +} + +impl Foldable for Box +where + X: Foldable, +{ + type Mapped = Box; + fn fold + ?Sized>( + self, + folder: &mut F, + ) -> Result { + (*self).fold(folder).map(Box::new) + } +} + +macro_rules! simple_fold { + ($($t:ty),+$(,)?) => { + $(impl $crate::fold_helpers::Foldable for $t { + type Mapped = Self; + #[inline] + fn fold + ?Sized>( + self, + _folder: &mut F, + ) -> Result { + Ok(self) + } + })+ + }; +} + +simple_fold!( + usize, + String, + bool, + constant::Constant, + constant::ConversionFlag +); diff --git a/nac3ast/src/impls.rs b/nac3ast/src/impls.rs new file mode 100644 index 00000000..666acd1f --- /dev/null +++ b/nac3ast/src/impls.rs @@ -0,0 +1,53 @@ +use crate::{Constant, ExprKind}; + +impl ExprKind { + /// Returns a short name for the node suitable for use in error messages. + pub fn name(&self) -> &'static str { + match self { + ExprKind::BoolOp { .. } | ExprKind::BinOp { .. } | ExprKind::UnaryOp { .. } => { + "operator" + } + ExprKind::Subscript { .. } => "subscript", + ExprKind::Await { .. } => "await expression", + ExprKind::Yield { .. } | ExprKind::YieldFrom { .. } => "yield expression", + ExprKind::Compare { .. } => "comparison", + ExprKind::Attribute { .. } => "attribute", + ExprKind::Call { .. } => "function call", + ExprKind::Constant { value, .. } => match value { + Constant::Str(_) + | Constant::Int(_) + | Constant::Float(_) + | Constant::Complex { .. } + | Constant::Bytes(_) => "literal", + Constant::Tuple(_) => "tuple", + Constant::Bool(_) | Constant::None => "keyword", + Constant::Ellipsis => "ellipsis", + }, + ExprKind::List { .. } => "list", + ExprKind::Tuple { .. } => "tuple", + ExprKind::Dict { .. } => "dict display", + ExprKind::Set { .. } => "set display", + ExprKind::ListComp { .. } => "list comprehension", + ExprKind::DictComp { .. } => "dict comprehension", + ExprKind::SetComp { .. } => "set comprehension", + ExprKind::GeneratorExp { .. } => "generator expression", + ExprKind::Starred { .. } => "starred", + ExprKind::Slice { .. } => "slice", + ExprKind::JoinedStr { values } => { + if values + .iter() + .any(|e| matches!(e.node, ExprKind::JoinedStr { .. })) + { + "f-string expression" + } else { + "literal" + } + } + ExprKind::FormattedValue { .. } => "f-string expression", + ExprKind::Name { .. } => "name", + ExprKind::Lambda { .. } => "lambda", + ExprKind::IfExp { .. } => "conditional expression", + ExprKind::NamedExpr { .. } => "named expression", + } + } +} diff --git a/nac3ast/src/lib.rs b/nac3ast/src/lib.rs new file mode 100644 index 00000000..83e86231 --- /dev/null +++ b/nac3ast/src/lib.rs @@ -0,0 +1,11 @@ +mod ast_gen; +mod constant; +#[cfg(feature = "fold")] +mod fold_helpers; +mod impls; +mod location; + +pub use ast_gen::*; +pub use location::Location; + +pub type Suite = Vec>; diff --git a/nac3ast/src/location.rs b/nac3ast/src/location.rs new file mode 100644 index 00000000..324c2a33 --- /dev/null +++ b/nac3ast/src/location.rs @@ -0,0 +1,79 @@ +//! Datatypes to support source location information. + +use std::fmt; + +/// A location somewhere in the sourcecode. +#[derive(Clone, Copy, Debug, Default, PartialEq)] +pub struct Location { + row: usize, + column: usize, +} + +impl fmt::Display for Location { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "line {} column {}", self.row, self.column) + } +} + +impl Location { + pub fn visualize<'a>( + &self, + line: &'a str, + desc: impl fmt::Display + 'a, + ) -> impl fmt::Display + 'a { + struct Visualize<'a, D: fmt::Display> { + loc: Location, + line: &'a str, + desc: D, + } + impl fmt::Display for Visualize<'_, D> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{}\n{}\n{arrow:>pad$}", + self.desc, + self.line, + pad = self.loc.column, + arrow = "↑", + ) + } + } + Visualize { + loc: *self, + line, + desc, + } + } +} + +impl Location { + pub fn new(row: usize, column: usize) -> Self { + Location { row, column } + } + + pub fn row(&self) -> usize { + self.row + } + + pub fn column(&self) -> usize { + self.column + } + + pub fn reset(&mut self) { + self.row = 1; + self.column = 1; + } + + pub fn go_right(&mut self) { + self.column += 1; + } + + pub fn go_left(&mut self) { + self.column -= 1; + } + + pub fn newline(&mut self) { + self.row += 1; + self.column = 1; + } +} diff --git a/nac3parser/Cargo.toml b/nac3parser/Cargo.toml new file mode 100644 index 00000000..b938b3a6 --- /dev/null +++ b/nac3parser/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "rustpython-parser" +version = "0.1.2" +description = "Parser for python code." +authors = [ "RustPython Team" ] +build = "build.rs" +repository = "https://github.com/RustPython/RustPython" +license = "MIT" +edition = "2018" + +[build-dependencies] +lalrpop = "0.19.6" + +[dependencies] +rustpython-ast = { path = "../ast" } +lalrpop-util = "0.19.6" +log = "0.4.1" +num-bigint = "0.4.0" +num-traits = "0.2" +unic-emoji-char = "0.9" +unic-ucd-ident = "0.9" +unicode_names2 = "0.4" +phf = { version = "0.9", features = ["macros"] } +ahash = "0.7.2" + +[dev-dependencies] +insta = "1.5" diff --git a/nac3parser/README.md b/nac3parser/README.md new file mode 100644 index 00000000..ebe9eb68 --- /dev/null +++ b/nac3parser/README.md @@ -0,0 +1,66 @@ +# RustPython/parser + +This directory has the code for python lexing, parsing and generating Abstract Syntax Trees (AST). + +The steps are: +- Lexical analysis: splits the source code into tokens. +- Parsing and generating the AST: transforms those tokens into an AST. Uses `LALRPOP`, a Rust parser generator framework. + +This crate is published on [https://docs.rs/rustpython-parser](https://docs.rs/rustpython-parser). + +We wrote [a blog post](https://rustpython.github.io/2020/04/02/thing-explainer-parser.html) with screenshots and an explanation to help you understand the steps by seeing them in action. + +For more information on LALRPOP, here is a link to the [LALRPOP book](https://github.com/lalrpop/lalrpop). + +There is a readme in the `src` folder with the details of each file. + + +## Directory content + +`build.rs`: The build script. +`Cargo.toml`: The config file. + +The `src` directory has: + +**lib.rs** +This is the crate's root. + +**lexer.rs** +This module takes care of lexing python source text. This means source code is translated into separate tokens. + +**parser.rs** +A python parsing module. Use this module to parse python code into an AST. There are three ways to parse python code. You could parse a whole program, a single statement, or a single expression. + +**ast.rs** + Implements abstract syntax tree (AST) nodes for the python language. Roughly equivalent to [the python AST](https://docs.python.org/3/library/ast.html). + +**python.lalrpop** +Python grammar. + +**token.rs** +Different token definitions. Loosely based on token.h from CPython source. + +**errors.rs** +Define internal parse error types. The goal is to provide a matching and a safe error API, masking errors from LALR. + +**fstring.rs** +Format strings. + +**function.rs** +Collection of functions for parsing parameters, arguments. + +**location.rs** +Datatypes to support source location information. + +**mode.rs** +Execution mode check. Allowed modes are `exec`, `eval` or `single`. + + +## How to use + +For example, one could do this: +``` + use rustpython_parser::{parser, ast}; + let python_source = "print('Hello world')"; + let python_ast = parser::parse_expression(python_source).unwrap(); +``` diff --git a/nac3parser/build.rs b/nac3parser/build.rs new file mode 100644 index 00000000..0bfc7927 --- /dev/null +++ b/nac3parser/build.rs @@ -0,0 +1,3 @@ +fn main() { + lalrpop::process_root().unwrap() +} diff --git a/nac3parser/src/error.rs b/nac3parser/src/error.rs new file mode 100644 index 00000000..21497c45 --- /dev/null +++ b/nac3parser/src/error.rs @@ -0,0 +1,239 @@ +//! Define internal parse error types +//! The goal is to provide a matching and a safe error API, maksing errors from LALR +use lalrpop_util::ParseError as LalrpopError; + +use crate::ast::Location; +use crate::token::Tok; + +use std::error::Error; +use std::fmt; + +/// Represents an error during lexical scanning. +#[derive(Debug, PartialEq)] +pub struct LexicalError { + pub error: LexicalErrorType, + pub location: Location, +} + +#[derive(Debug, PartialEq)] +pub enum LexicalErrorType { + StringError, + UnicodeError, + NestingError, + IndentationError, + TabError, + TabsAfterSpaces, + DefaultArgumentError, + PositionalArgumentError, + DuplicateKeywordArgumentError, + UnrecognizedToken { tok: char }, + FStringError(FStringErrorType), + LineContinuationError, + Eof, + OtherError(String), +} + +impl fmt::Display for LexicalErrorType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + LexicalErrorType::StringError => write!(f, "Got unexpected string"), + LexicalErrorType::FStringError(error) => write!(f, "Got error in f-string: {}", error), + LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"), + LexicalErrorType::NestingError => write!(f, "Got unexpected nesting"), + LexicalErrorType::IndentationError => { + write!(f, "unindent does not match any outer indentation level") + } + LexicalErrorType::TabError => { + write!(f, "inconsistent use of tabs and spaces in indentation") + } + LexicalErrorType::TabsAfterSpaces => { + write!(f, "Tabs not allowed as part of indentation after spaces") + } + LexicalErrorType::DefaultArgumentError => { + write!(f, "non-default argument follows default argument") + } + LexicalErrorType::DuplicateKeywordArgumentError => { + write!(f, "keyword argument repeated") + } + LexicalErrorType::PositionalArgumentError => { + write!(f, "positional argument follows keyword argument") + } + LexicalErrorType::UnrecognizedToken { tok } => { + write!(f, "Got unexpected token {}", tok) + } + LexicalErrorType::LineContinuationError => { + write!(f, "unexpected character after line continuation character") + } + LexicalErrorType::Eof => write!(f, "unexpected EOF while parsing"), + LexicalErrorType::OtherError(msg) => write!(f, "{}", msg), + } + } +} + +// TODO: consolidate these with ParseError +#[derive(Debug, PartialEq)] +pub struct FStringError { + pub error: FStringErrorType, + pub location: Location, +} + +#[derive(Debug, PartialEq)] +pub enum FStringErrorType { + UnclosedLbrace, + UnopenedRbrace, + ExpectedRbrace, + InvalidExpression(Box), + InvalidConversionFlag, + EmptyExpression, + MismatchedDelimiter, + ExpressionNestedTooDeeply, +} + +impl fmt::Display for FStringErrorType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + FStringErrorType::UnclosedLbrace => write!(f, "Unclosed '{{'"), + FStringErrorType::UnopenedRbrace => write!(f, "Unopened '}}'"), + FStringErrorType::ExpectedRbrace => write!(f, "Expected '}}' after conversion flag."), + FStringErrorType::InvalidExpression(error) => { + write!(f, "Invalid expression: {}", error) + } + FStringErrorType::InvalidConversionFlag => write!(f, "Invalid conversion flag"), + FStringErrorType::EmptyExpression => write!(f, "Empty expression"), + FStringErrorType::MismatchedDelimiter => write!(f, "Mismatched delimiter"), + FStringErrorType::ExpressionNestedTooDeeply => { + write!(f, "expressions nested too deeply") + } + } + } +} + +impl From for LalrpopError { + fn from(err: FStringError) -> Self { + lalrpop_util::ParseError::User { + error: LexicalError { + error: LexicalErrorType::FStringError(err.error), + location: err.location, + }, + } + } +} + +/// Represents an error during parsing +#[derive(Debug, PartialEq)] +pub struct ParseError { + pub error: ParseErrorType, + pub location: Location, +} + +#[derive(Debug, PartialEq)] +pub enum ParseErrorType { + /// Parser encountered an unexpected end of input + Eof, + /// Parser encountered an extra token + ExtraToken(Tok), + /// Parser encountered an invalid token + InvalidToken, + /// Parser encountered an unexpected token + UnrecognizedToken(Tok, Option), + /// Maps to `User` type from `lalrpop-util` + Lexical(LexicalErrorType), +} + +/// Convert `lalrpop_util::ParseError` to our internal type +impl From> for ParseError { + fn from(err: LalrpopError) -> Self { + match err { + // TODO: Are there cases where this isn't an EOF? + LalrpopError::InvalidToken { location } => ParseError { + error: ParseErrorType::Eof, + location, + }, + LalrpopError::ExtraToken { token } => ParseError { + error: ParseErrorType::ExtraToken(token.1), + location: token.0, + }, + LalrpopError::User { error } => ParseError { + error: ParseErrorType::Lexical(error.error), + location: error.location, + }, + LalrpopError::UnrecognizedToken { token, expected } => { + // Hacky, but it's how CPython does it. See PyParser_AddToken, + // in particular "Only one possible expected token" comment. + let expected = if expected.len() == 1 { + Some(expected[0].clone()) + } else { + None + }; + ParseError { + error: ParseErrorType::UnrecognizedToken(token.1, expected), + location: token.0, + } + } + LalrpopError::UnrecognizedEOF { location, .. } => ParseError { + error: ParseErrorType::Eof, + location, + }, + } + } +} + +impl fmt::Display for ParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{} at {}", self.error, self.location) + } +} + +impl fmt::Display for ParseErrorType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + ParseErrorType::Eof => write!(f, "Got unexpected EOF"), + ParseErrorType::ExtraToken(ref tok) => write!(f, "Got extraneous token: {:?}", tok), + ParseErrorType::InvalidToken => write!(f, "Got invalid token"), + ParseErrorType::UnrecognizedToken(ref tok, ref expected) => { + if *tok == Tok::Indent { + write!(f, "unexpected indent") + } else if expected.as_deref() == Some("Indent") { + write!(f, "expected an indented block") + } else { + write!(f, "Got unexpected token {}", tok) + } + } + ParseErrorType::Lexical(ref error) => write!(f, "{}", error), + } + } +} + +impl Error for ParseErrorType {} + +impl ParseErrorType { + pub fn is_indentation_error(&self) -> bool { + match self { + ParseErrorType::Lexical(LexicalErrorType::IndentationError) => true, + ParseErrorType::UnrecognizedToken(token, expected) => { + *token == Tok::Indent || expected.clone() == Some("Indent".to_owned()) + } + _ => false, + } + } + pub fn is_tab_error(&self) -> bool { + matches!( + self, + ParseErrorType::Lexical(LexicalErrorType::TabError) + | ParseErrorType::Lexical(LexicalErrorType::TabsAfterSpaces) + ) + } +} + +impl std::ops::Deref for ParseError { + type Target = ParseErrorType; + fn deref(&self) -> &Self::Target { + &self.error + } +} + +impl Error for ParseError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + None + } +} diff --git a/nac3parser/src/fstring.rs b/nac3parser/src/fstring.rs new file mode 100644 index 00000000..6910ddc9 --- /dev/null +++ b/nac3parser/src/fstring.rs @@ -0,0 +1,405 @@ +use std::iter; +use std::mem; +use std::str; + +use crate::ast::{Constant, ConversionFlag, Expr, ExprKind, Location}; +use crate::error::{FStringError, FStringErrorType, ParseError}; +use crate::parser::parse_expression; + +use self::FStringErrorType::*; + +struct FStringParser<'a> { + chars: iter::Peekable>, + str_location: Location, +} + +impl<'a> FStringParser<'a> { + fn new(source: &'a str, str_location: Location) -> Self { + Self { + chars: source.chars().peekable(), + str_location, + } + } + + #[inline] + fn expr(&self, node: ExprKind) -> Expr { + Expr::new(self.str_location, node) + } + + fn parse_formatted_value(&mut self) -> Result, FStringErrorType> { + let mut expression = String::new(); + let mut spec = None; + let mut delims = Vec::new(); + let mut conversion = None; + let mut pred_expression_text = String::new(); + let mut trailing_seq = String::new(); + + while let Some(ch) = self.chars.next() { + match ch { + // can be integrated better with the remainign code, but as a starting point ok + // in general I would do here a tokenizing of the fstrings to omit this peeking. + '!' if self.chars.peek() == Some(&'=') => { + expression.push_str("!="); + self.chars.next(); + } + + '=' if self.chars.peek() == Some(&'=') => { + expression.push_str("=="); + self.chars.next(); + } + + '>' if self.chars.peek() == Some(&'=') => { + expression.push_str(">="); + self.chars.next(); + } + + '<' if self.chars.peek() == Some(&'=') => { + expression.push_str("<="); + self.chars.next(); + } + + '!' if delims.is_empty() && self.chars.peek() != Some(&'=') => { + if expression.trim().is_empty() { + return Err(EmptyExpression); + } + + conversion = Some(match self.chars.next() { + Some('s') => ConversionFlag::Str, + Some('a') => ConversionFlag::Ascii, + Some('r') => ConversionFlag::Repr, + Some(_) => { + return Err(InvalidConversionFlag); + } + None => { + return Err(ExpectedRbrace); + } + }); + + if let Some(&peek) = self.chars.peek() { + if peek != '}' && peek != ':' { + return Err(ExpectedRbrace); + } + } else { + return Err(ExpectedRbrace); + } + } + + // match a python 3.8 self documenting expression + // format '{' PYTHON_EXPRESSION '=' FORMAT_SPECIFIER? '}' + '=' if self.chars.peek() != Some(&'=') && delims.is_empty() => { + pred_expression_text = expression.to_string(); // safe expression before = to print it + } + + ':' if delims.is_empty() => { + let mut nested = false; + let mut in_nested = false; + let mut spec_expression = String::new(); + while let Some(&next) = self.chars.peek() { + match next { + '{' => { + if in_nested { + return Err(ExpressionNestedTooDeeply); + } + in_nested = true; + nested = true; + self.chars.next(); + continue; + } + '}' => { + if in_nested { + in_nested = false; + self.chars.next(); + } + break; + } + _ => (), + } + spec_expression.push(next); + self.chars.next(); + } + if in_nested { + return Err(UnclosedLbrace); + } + spec = Some(if nested { + Box::new( + self.expr(ExprKind::FormattedValue { + value: Box::new( + parse_fstring_expr(&spec_expression) + .map_err(|e| InvalidExpression(Box::new(e.error)))?, + ), + conversion: None, + format_spec: None, + }), + ) + } else { + Box::new(self.expr(ExprKind::Constant { + value: spec_expression.to_owned().into(), + kind: None, + })) + }) + } + '(' | '{' | '[' => { + expression.push(ch); + delims.push(ch); + } + ')' => { + if delims.pop() != Some('(') { + return Err(MismatchedDelimiter); + } + expression.push(ch); + } + ']' => { + if delims.pop() != Some('[') { + return Err(MismatchedDelimiter); + } + expression.push(ch); + } + '}' if !delims.is_empty() => { + if delims.pop() != Some('{') { + return Err(MismatchedDelimiter); + } + expression.push(ch); + } + '}' => { + if expression.is_empty() { + return Err(EmptyExpression); + } + let ret = if pred_expression_text.is_empty() { + vec![self.expr(ExprKind::FormattedValue { + value: Box::new( + parse_fstring_expr(&expression) + .map_err(|e| InvalidExpression(Box::new(e.error)))?, + ), + conversion, + format_spec: spec, + })] + } else { + vec![ + self.expr(ExprKind::Constant { + value: Constant::Str(pred_expression_text + "="), + kind: None, + }), + self.expr(ExprKind::Constant { + value: trailing_seq.into(), + kind: None, + }), + self.expr(ExprKind::FormattedValue { + value: Box::new( + parse_fstring_expr(&expression) + .map_err(|e| InvalidExpression(Box::new(e.error)))?, + ), + conversion, + format_spec: spec, + }), + ] + }; + return Ok(ret); + } + '"' | '\'' => { + expression.push(ch); + for next in &mut self.chars { + expression.push(next); + if next == ch { + break; + } + } + } + ' ' if !pred_expression_text.is_empty() => { + trailing_seq.push(ch); + } + _ => { + expression.push(ch); + } + } + } + Err(UnclosedLbrace) + } + + fn parse(mut self) -> Result { + let mut content = String::new(); + let mut values = vec![]; + + while let Some(ch) = self.chars.next() { + match ch { + '{' => { + if let Some('{') = self.chars.peek() { + self.chars.next(); + content.push('{'); + } else { + if !content.is_empty() { + values.push(self.expr(ExprKind::Constant { + value: mem::take(&mut content).into(), + kind: None, + })); + } + + values.extend(self.parse_formatted_value()?); + } + } + '}' => { + if let Some('}') = self.chars.peek() { + self.chars.next(); + content.push('}'); + } else { + return Err(UnopenedRbrace); + } + } + _ => { + content.push(ch); + } + } + } + + if !content.is_empty() { + values.push(self.expr(ExprKind::Constant { + value: content.into(), + kind: None, + })) + } + + let s = match values.len() { + 0 => self.expr(ExprKind::Constant { + value: String::new().into(), + kind: None, + }), + 1 => values.into_iter().next().unwrap(), + _ => self.expr(ExprKind::JoinedStr { values }), + }; + Ok(s) + } +} + +fn parse_fstring_expr(source: &str) -> Result { + let fstring_body = format!("({})", source); + parse_expression(&fstring_body) +} + +/// Parse an fstring from a string, located at a certain position in the sourcecode. +/// In case of errors, we will get the location and the error returned. +pub fn parse_located_fstring(source: &str, location: Location) -> Result { + FStringParser::new(source, location) + .parse() + .map_err(|error| FStringError { error, location }) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_fstring(source: &str) -> Result { + FStringParser::new(source, Location::default()).parse() + } + + #[test] + fn test_parse_fstring() { + let source = "{a}{ b }{{foo}}"; + let parse_ast = parse_fstring(&source).unwrap(); + + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_fstring_nested_spec() { + let source = "{foo:{spec}}"; + let parse_ast = parse_fstring(&source).unwrap(); + + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_fstring_not_nested_spec() { + let source = "{foo:spec}"; + let parse_ast = parse_fstring(&source).unwrap(); + + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_empty_fstring() { + insta::assert_debug_snapshot!(parse_fstring("").unwrap()); + } + + #[test] + fn test_fstring_parse_selfdocumenting_base() { + let src = "{user=}"; + let parse_ast = parse_fstring(&src).unwrap(); + + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_fstring_parse_selfdocumenting_base_more() { + let src = "mix {user=} with text and {second=}"; + let parse_ast = parse_fstring(&src).unwrap(); + + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_fstring_parse_selfdocumenting_format() { + let src = "{user=:>10}"; + let parse_ast = parse_fstring(&src).unwrap(); + + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_invalid_fstring() { + assert_eq!(parse_fstring("{5!a"), Err(ExpectedRbrace)); + assert_eq!(parse_fstring("{5!a1}"), Err(ExpectedRbrace)); + assert_eq!(parse_fstring("{5!"), Err(ExpectedRbrace)); + assert_eq!(parse_fstring("abc{!a 'cat'}"), Err(EmptyExpression)); + assert_eq!(parse_fstring("{!a"), Err(EmptyExpression)); + assert_eq!(parse_fstring("{ !a}"), Err(EmptyExpression)); + + assert_eq!(parse_fstring("{5!}"), Err(InvalidConversionFlag)); + assert_eq!(parse_fstring("{5!x}"), Err(InvalidConversionFlag)); + + assert_eq!(parse_fstring("{a:{a:{b}}"), Err(ExpressionNestedTooDeeply)); + + assert_eq!(parse_fstring("{a:b}}"), Err(UnopenedRbrace)); + assert_eq!(parse_fstring("}"), Err(UnopenedRbrace)); + assert_eq!(parse_fstring("{a:{b}"), Err(UnclosedLbrace)); + assert_eq!(parse_fstring("{"), Err(UnclosedLbrace)); + + assert_eq!(parse_fstring("{}"), Err(EmptyExpression)); + + // TODO: check for InvalidExpression enum? + assert!(parse_fstring("{class}").is_err()); + } + + #[test] + fn test_parse_fstring_not_equals() { + let source = "{1 != 2}"; + let parse_ast = parse_fstring(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_fstring_equals() { + let source = "{42 == 42}"; + let parse_ast = parse_fstring(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_fstring_selfdoc_prec_space() { + let source = "{x =}"; + let parse_ast = parse_fstring(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_fstring_selfdoc_trailing_space() { + let source = "{x= }"; + let parse_ast = parse_fstring(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_fstring_yield_expr() { + let source = "{yield}"; + let parse_ast = parse_fstring(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } +} diff --git a/nac3parser/src/function.rs b/nac3parser/src/function.rs new file mode 100644 index 00000000..68d890f2 --- /dev/null +++ b/nac3parser/src/function.rs @@ -0,0 +1,96 @@ +use ahash::RandomState; +use std::collections::HashSet; + +use crate::ast; +use crate::error::{LexicalError, LexicalErrorType}; + +pub struct ArgumentList { + pub args: Vec, + pub keywords: Vec, +} + +type ParameterDefs = (Vec, Vec, Vec); +type ParameterDef = (ast::Arg, Option); + +pub fn parse_params( + params: (Vec, Vec), +) -> Result { + let mut posonly = Vec::with_capacity(params.0.len()); + let mut names = Vec::with_capacity(params.1.len()); + let mut defaults = vec![]; + + let mut try_default = |name: &ast::Arg, default| { + if let Some(default) = default { + defaults.push(default); + } else if !defaults.is_empty() { + // Once we have started with defaults, all remaining arguments must + // have defaults + return Err(LexicalError { + error: LexicalErrorType::DefaultArgumentError, + location: name.location, + }); + } + Ok(()) + }; + + for (name, default) in params.0 { + try_default(&name, default)?; + posonly.push(name); + } + + for (name, default) in params.1 { + try_default(&name, default)?; + names.push(name); + } + + Ok((posonly, names, defaults)) +} + +type FunctionArgument = (Option<(ast::Location, Option)>, ast::Expr); + +pub fn parse_args(func_args: Vec) -> Result { + let mut args = vec![]; + let mut keywords = vec![]; + + let mut keyword_names = HashSet::with_capacity_and_hasher(func_args.len(), RandomState::new()); + for (name, value) in func_args { + match name { + Some((location, name)) => { + if let Some(keyword_name) = &name { + if keyword_names.contains(keyword_name) { + return Err(LexicalError { + error: LexicalErrorType::DuplicateKeywordArgumentError, + location, + }); + } + + keyword_names.insert(keyword_name.clone()); + } + + keywords.push(ast::Keyword::new( + location, + ast::KeywordData { + arg: name, + value: Box::new(value), + }, + )); + } + None => { + // Allow starred args after keyword arguments. + if !keywords.is_empty() && !is_starred(&value) { + return Err(LexicalError { + error: LexicalErrorType::PositionalArgumentError, + location: value.location, + }); + } + + args.push(value); + } + } + } + Ok(ArgumentList { args, keywords }) +} + +fn is_starred(exp: &ast::Expr) -> bool { + matches!(exp.node, ast::ExprKind::Starred { .. }) +} diff --git a/nac3parser/src/lexer.rs b/nac3parser/src/lexer.rs new file mode 100644 index 00000000..dba6d074 --- /dev/null +++ b/nac3parser/src/lexer.rs @@ -0,0 +1,1757 @@ +//! This module takes care of lexing python source text. +//! +//! This means source code is translated into separate tokens. + +pub use super::token::Tok; +use crate::ast::Location; +use crate::error::{LexicalError, LexicalErrorType}; +use num_bigint::BigInt; +use num_traits::identities::Zero; +use num_traits::Num; +use std::char; +use std::cmp::Ordering; +use std::str::FromStr; +use unic_emoji_char::is_emoji_presentation; +use unic_ucd_ident::{is_xid_continue, is_xid_start}; + +#[derive(Clone, Copy, PartialEq, Debug, Default)] +struct IndentationLevel { + tabs: usize, + spaces: usize, +} + +impl IndentationLevel { + fn compare_strict( + &self, + other: &IndentationLevel, + location: Location, + ) -> Result { + // We only know for sure that we're smaller or bigger if tabs + // and spaces both differ in the same direction. Otherwise we're + // dependent on the size of tabs. + match self.tabs.cmp(&other.tabs) { + Ordering::Less => { + if self.spaces <= other.spaces { + Ok(Ordering::Less) + } else { + Err(LexicalError { + location, + error: LexicalErrorType::TabError, + }) + } + } + Ordering::Greater => { + if self.spaces >= other.spaces { + Ok(Ordering::Greater) + } else { + Err(LexicalError { + location, + error: LexicalErrorType::TabError, + }) + } + } + Ordering::Equal => Ok(self.spaces.cmp(&other.spaces)), + } + } +} + +pub struct Lexer> { + chars: T, + at_begin_of_line: bool, + nesting: usize, // Amount of parenthesis + indentation_stack: Vec, + pending: Vec, + chr0: Option, + chr1: Option, + chr2: Option, + location: Location, +} + +pub static KEYWORDS: phf::Map<&'static str, Tok> = phf::phf_map! { + // Alphabetical keywords: + "..." => Tok::Ellipsis, + "False" => Tok::False, + "None" => Tok::None, + "True" => Tok::True, + + "and" => Tok::And, + "as" => Tok::As, + "assert" => Tok::Assert, + "async" => Tok::Async, + "await" => Tok::Await, + "break" => Tok::Break, + "class" => Tok::Class, + "continue" => Tok::Continue, + "def" => Tok::Def, + "del" => Tok::Del, + "elif" => Tok::Elif, + "else" => Tok::Else, + "except" => Tok::Except, + "finally" => Tok::Finally, + "for" => Tok::For, + "from" => Tok::From, + "global" => Tok::Global, + "if" => Tok::If, + "import" => Tok::Import, + "in" => Tok::In, + "is" => Tok::Is, + "lambda" => Tok::Lambda, + "nonlocal" => Tok::Nonlocal, + "not" => Tok::Not, + "or" => Tok::Or, + "pass" => Tok::Pass, + "raise" => Tok::Raise, + "return" => Tok::Return, + "try" => Tok::Try, + "while" => Tok::While, + "with" => Tok::With, + "yield" => Tok::Yield, +}; + +pub type Spanned = (Location, Tok, Location); +pub type LexResult = Result; + +#[inline] +pub fn make_tokenizer(source: &str) -> impl Iterator + '_ { + make_tokenizer_located(source, Location::new(0, 0)) +} + +pub fn make_tokenizer_located( + source: &str, + start_location: Location, +) -> impl Iterator + '_ { + let nlh = NewlineHandler::new(source.chars()); + Lexer::new(nlh, start_location) +} + +// The newline handler is an iterator which collapses different newline +// types into \n always. +pub struct NewlineHandler> { + source: T, + chr0: Option, + chr1: Option, +} + +impl NewlineHandler +where + T: Iterator, +{ + pub fn new(source: T) -> Self { + let mut nlh = NewlineHandler { + source, + chr0: None, + chr1: None, + }; + nlh.shift(); + nlh.shift(); + nlh + } + + fn shift(&mut self) -> Option { + let result = self.chr0; + self.chr0 = self.chr1; + self.chr1 = self.source.next(); + result + } +} + +impl Iterator for NewlineHandler +where + T: Iterator, +{ + type Item = char; + + fn next(&mut self) -> Option { + // Collapse \r\n into \n + loop { + if self.chr0 == Some('\r') { + if self.chr1 == Some('\n') { + // Transform windows EOL into \n + self.shift(); + } else { + // Transform MAC EOL into \n + self.chr0 = Some('\n') + } + } else { + break; + } + } + + self.shift() + } +} + +impl Lexer +where + T: Iterator, +{ + pub fn new(input: T, start: Location) -> Self { + let mut lxr = Lexer { + chars: input, + at_begin_of_line: true, + nesting: 0, + indentation_stack: vec![Default::default()], + pending: Vec::new(), + chr0: None, + location: start, + chr1: None, + chr2: None, + }; + lxr.next_char(); + lxr.next_char(); + lxr.next_char(); + // Start at top row (=1) left column (=1) + lxr.location.reset(); + lxr + } + + // Lexer helper functions: + fn lex_identifier(&mut self) -> LexResult { + let mut name = String::new(); + let start_pos = self.get_pos(); + + // Detect potential string like rb'' b'' f'' u'' r'' + let mut saw_b = false; + let mut saw_r = false; + let mut saw_u = false; + let mut saw_f = false; + loop { + // Detect r"", f"", b"" and u"" + if !(saw_b || saw_u || saw_f) && matches!(self.chr0, Some('b') | Some('B')) { + saw_b = true; + } else if !(saw_b || saw_r || saw_u || saw_f) + && matches!(self.chr0, Some('u') | Some('U')) + { + saw_u = true; + } else if !(saw_r || saw_u) && (self.chr0 == Some('r') || self.chr0 == Some('R')) { + saw_r = true; + } else if !(saw_b || saw_u || saw_f) + && (self.chr0 == Some('f') || self.chr0 == Some('F')) + { + saw_f = true; + } else { + break; + } + + // Take up char into name: + name.push(self.next_char().unwrap()); + + // Check if we have a string: + if self.chr0 == Some('"') || self.chr0 == Some('\'') { + return self.lex_string(saw_b, saw_r, saw_u, saw_f); + } + } + + while self.is_identifier_continuation() { + name.push(self.next_char().unwrap()); + } + let end_pos = self.get_pos(); + + if let Some(tok) = KEYWORDS.get(name.as_str()) { + Ok((start_pos, tok.clone(), end_pos)) + } else { + Ok((start_pos, Tok::Name { name }, end_pos)) + } + } + + /// Numeric lexing. The feast can start! + fn lex_number(&mut self) -> LexResult { + let start_pos = self.get_pos(); + if self.chr0 == Some('0') { + if self.chr1 == Some('x') || self.chr1 == Some('X') { + // Hex! + self.next_char(); + self.next_char(); + self.lex_number_radix(start_pos, 16) + } else if self.chr1 == Some('o') || self.chr1 == Some('O') { + // Octal style! + self.next_char(); + self.next_char(); + self.lex_number_radix(start_pos, 8) + } else if self.chr1 == Some('b') || self.chr1 == Some('B') { + // Binary! + self.next_char(); + self.next_char(); + self.lex_number_radix(start_pos, 2) + } else { + self.lex_normal_number() + } + } else { + self.lex_normal_number() + } + } + + /// Lex a hex/octal/decimal/binary number without a decimal point. + fn lex_number_radix(&mut self, start_pos: Location, radix: u32) -> LexResult { + let value_text = self.radix_run(radix); + let end_pos = self.get_pos(); + let value = BigInt::from_str_radix(&value_text, radix).map_err(|e| LexicalError { + error: LexicalErrorType::OtherError(format!("{:?}", e)), + location: start_pos, + })?; + Ok((start_pos, Tok::Int { value }, end_pos)) + } + + /// Lex a normal number, that is, no octal, hex or binary number. + fn lex_normal_number(&mut self) -> LexResult { + let start_pos = self.get_pos(); + let start_is_zero = self.chr0 == Some('0'); + // Normal number: + let mut value_text = self.radix_run(10); + + // If float: + if self.chr0 == Some('.') || self.at_exponent() { + // Take '.': + if self.chr0 == Some('.') { + if self.chr1 == Some('_') { + return Err(LexicalError { + error: LexicalErrorType::OtherError("Invalid Syntax".to_owned()), + location: self.get_pos(), + }); + } + value_text.push(self.next_char().unwrap()); + value_text.push_str(&self.radix_run(10)); + } + + // 1e6 for example: + if self.chr0 == Some('e') || self.chr0 == Some('E') { + value_text.push(self.next_char().unwrap().to_ascii_lowercase()); + + // Optional +/- + if self.chr0 == Some('-') || self.chr0 == Some('+') { + value_text.push(self.next_char().unwrap()); + } + + value_text.push_str(&self.radix_run(10)); + } + + let value = f64::from_str(&value_text).unwrap(); + // Parse trailing 'j': + if self.chr0 == Some('j') || self.chr0 == Some('J') { + self.next_char(); + let end_pos = self.get_pos(); + Ok(( + start_pos, + Tok::Complex { + real: 0.0, + imag: value, + }, + end_pos, + )) + } else { + let end_pos = self.get_pos(); + Ok((start_pos, Tok::Float { value }, end_pos)) + } + } else { + // Parse trailing 'j': + if self.chr0 == Some('j') || self.chr0 == Some('J') { + self.next_char(); + let end_pos = self.get_pos(); + let imag = f64::from_str(&value_text).unwrap(); + Ok((start_pos, Tok::Complex { real: 0.0, imag }, end_pos)) + } else { + let end_pos = self.get_pos(); + let value = value_text.parse::().unwrap(); + if start_is_zero && !value.is_zero() { + return Err(LexicalError { + error: LexicalErrorType::OtherError("Invalid Token".to_owned()), + location: self.get_pos(), + }); + } + Ok((start_pos, Tok::Int { value }, end_pos)) + } + } + } + + /// Consume a sequence of numbers with the given radix, + /// the digits can be decorated with underscores + /// like this: '1_2_3_4' == '1234' + fn radix_run(&mut self, radix: u32) -> String { + let mut value_text = String::new(); + + loop { + if let Some(c) = self.take_number(radix) { + value_text.push(c); + } else if self.chr0 == Some('_') && Lexer::::is_digit_of_radix(self.chr1, radix) { + self.next_char(); + } else { + break; + } + } + value_text + } + + /// Consume a single character with the given radix. + fn take_number(&mut self, radix: u32) -> Option { + let take_char = Lexer::::is_digit_of_radix(self.chr0, radix); + + if take_char { + Some(self.next_char().unwrap()) + } else { + None + } + } + + /// Test if a digit is of a certain radix. + fn is_digit_of_radix(c: Option, radix: u32) -> bool { + match radix { + 2 => matches!(c, Some('0'..='1')), + 8 => matches!(c, Some('0'..='7')), + 10 => matches!(c, Some('0'..='9')), + 16 => matches!(c, Some('0'..='9') | Some('a'..='f') | Some('A'..='F')), + other => unimplemented!("Radix not implemented: {}", other), + } + } + + /// Test if we face '[eE][-+]?[0-9]+' + fn at_exponent(&self) -> bool { + match self.chr0 { + Some('e') | Some('E') => match self.chr1 { + Some('+') | Some('-') => matches!(self.chr2, Some('0'..='9')), + Some('0'..='9') => true, + _ => false, + }, + _ => false, + } + } + + /// Skip everything until end of line + fn lex_comment(&mut self) { + self.next_char(); + loop { + match self.chr0 { + Some('\n') => return, + Some(_) => {} + None => return, + } + self.next_char(); + } + } + + fn unicode_literal(&mut self, literal_number: usize) -> Result { + let mut p: u32 = 0u32; + let unicode_error = LexicalError { + error: LexicalErrorType::UnicodeError, + location: self.get_pos(), + }; + for i in 1..=literal_number { + match self.next_char() { + Some(c) => match c.to_digit(16) { + Some(d) => p += d << ((literal_number - i) * 4), + None => return Err(unicode_error), + }, + None => return Err(unicode_error), + } + } + match p { + 0xD800..=0xDFFF => Ok(std::char::REPLACEMENT_CHARACTER), + _ => std::char::from_u32(p).ok_or(unicode_error), + } + } + + fn parse_octet(&mut self, first: char) -> char { + let mut octet_content = String::new(); + octet_content.push(first); + while octet_content.len() < 3 { + if let Some('0'..='7') = self.chr0 { + octet_content.push(self.next_char().unwrap()) + } else { + break; + } + } + let value = u32::from_str_radix(&octet_content, 8).unwrap(); + char::from_u32(value).unwrap() + } + + fn parse_unicode_name(&mut self) -> Result { + let start_pos = self.get_pos(); + match self.next_char() { + Some('{') => {} + _ => { + return Err(LexicalError { + error: LexicalErrorType::StringError, + location: start_pos, + }) + } + } + let start_pos = self.get_pos(); + let mut name = String::new(); + loop { + match self.next_char() { + Some('}') => break, + Some(c) => name.push(c), + None => { + return Err(LexicalError { + error: LexicalErrorType::StringError, + location: self.get_pos(), + }) + } + } + } + unicode_names2::character(&name).ok_or(LexicalError { + error: LexicalErrorType::UnicodeError, + location: start_pos, + }) + } + + fn lex_string( + &mut self, + is_bytes: bool, + is_raw: bool, + _is_unicode: bool, + is_fstring: bool, + ) -> LexResult { + let quote_char = self.next_char().unwrap(); + let mut string_content = String::new(); + let start_pos = self.get_pos(); + + // If the next two characters are also the quote character, then we have a triple-quoted + // string; consume those two characters and ensure that we require a triple-quote to close + let triple_quoted = if self.chr0 == Some(quote_char) && self.chr1 == Some(quote_char) { + self.next_char(); + self.next_char(); + true + } else { + false + }; + + loop { + match self.next_char() { + Some('\\') => { + if self.chr0 == Some(quote_char) && !is_raw { + string_content.push(quote_char); + self.next_char(); + } else if is_raw { + string_content.push('\\'); + if let Some(c) = self.next_char() { + string_content.push(c) + } else { + return Err(LexicalError { + error: LexicalErrorType::StringError, + location: self.get_pos(), + }); + } + } else { + match self.next_char() { + Some('\\') => { + string_content.push('\\'); + } + Some('\'') => string_content.push('\''), + Some('\"') => string_content.push('\"'), + Some('\n') => { + // Ignore Unix EOL character + } + Some('a') => string_content.push('\x07'), + Some('b') => string_content.push('\x08'), + Some('f') => string_content.push('\x0c'), + Some('n') => { + string_content.push('\n'); + } + Some('r') => string_content.push('\r'), + Some('t') => { + string_content.push('\t'); + } + Some('v') => string_content.push('\x0b'), + Some(o @ '0'..='7') => string_content.push(self.parse_octet(o)), + Some('x') => string_content.push(self.unicode_literal(2)?), + Some('u') if !is_bytes => string_content.push(self.unicode_literal(4)?), + Some('U') if !is_bytes => string_content.push(self.unicode_literal(8)?), + Some('N') if !is_bytes => { + string_content.push(self.parse_unicode_name()?) + } + Some(c) => { + string_content.push('\\'); + string_content.push(c); + } + None => { + return Err(LexicalError { + error: LexicalErrorType::StringError, + location: self.get_pos(), + }); + } + } + } + } + Some(c) => { + if c == quote_char { + if triple_quoted { + // Look ahead at the next two characters; if we have two more + // quote_chars, it's the end of the string; consume the remaining + // closing quotes and break the loop + if self.chr0 == Some(quote_char) && self.chr1 == Some(quote_char) { + self.next_char(); + self.next_char(); + break; + } + string_content.push(c); + } else { + break; + } + } else { + if (c == '\n' && !triple_quoted) || (is_bytes && !c.is_ascii()) { + return Err(LexicalError { + error: LexicalErrorType::StringError, + location: self.get_pos(), + }); + } + string_content.push(c); + } + } + None => { + return Err(LexicalError { + error: LexicalErrorType::StringError, + location: self.get_pos(), + }); + } + } + } + let end_pos = self.get_pos(); + + let tok = if is_bytes { + Tok::Bytes { + value: string_content.chars().map(|c| c as u8).collect(), + } + } else { + Tok::String { + value: string_content, + is_fstring, + } + }; + + Ok((start_pos, tok, end_pos)) + } + + fn is_identifier_start(&self, c: char) -> bool { + c == '_' || is_xid_start(c) + } + + fn is_identifier_continuation(&self) -> bool { + if let Some(c) = self.chr0 { + match c { + '_' | '0'..='9' => true, + c => is_xid_continue(c), + } + } else { + false + } + } + + /// This is the main entry point. Call this function to retrieve the next token. + /// This function is used by the iterator implementation. + fn inner_next(&mut self) -> LexResult { + // top loop, keep on processing, until we have something pending. + while self.pending.is_empty() { + // Detect indentation levels + if self.at_begin_of_line { + self.handle_indentations()?; + } + + self.consume_normal()?; + } + + Ok(self.pending.remove(0)) + } + + /// Given we are at the start of a line, count the number of spaces and/or tabs until the first character. + fn eat_indentation(&mut self) -> Result { + // Determine indentation: + let mut spaces: usize = 0; + let mut tabs: usize = 0; + loop { + match self.chr0 { + Some(' ') => { + /* + if tabs != 0 { + // Don't allow spaces after tabs as part of indentation. + // This is technically stricter than python3 but spaces after + // tabs is even more insane than mixing spaces and tabs. + return Some(Err(LexicalError { + error: LexicalErrorType::OtherError("Spaces not allowed as part of indentation after tabs".to_owned()), + location: self.get_pos(), + })); + } + */ + self.next_char(); + spaces += 1; + } + Some('\t') => { + if spaces != 0 { + // Don't allow tabs after spaces as part of indentation. + // This is technically stricter than python3 but spaces before + // tabs is even more insane than mixing spaces and tabs. + return Err(LexicalError { + error: LexicalErrorType::TabsAfterSpaces, + location: self.get_pos(), + }); + } + self.next_char(); + tabs += 1; + } + Some('#') => { + self.lex_comment(); + spaces = 0; + tabs = 0; + } + Some('\x0C') => { + // Form feed character! + // Reset indentation for the Emacs user. + self.next_char(); + spaces = 0; + tabs = 0; + } + Some('\n') => { + // Empty line! + self.next_char(); + spaces = 0; + tabs = 0; + } + None => { + spaces = 0; + tabs = 0; + break; + } + _ => { + self.at_begin_of_line = false; + break; + } + } + } + + Ok(IndentationLevel { tabs, spaces }) + } + + fn handle_indentations(&mut self) -> Result<(), LexicalError> { + let indentation_level = self.eat_indentation()?; + + if self.nesting == 0 { + // Determine indent or dedent: + let current_indentation = self.indentation_stack.last().unwrap(); + let ordering = indentation_level.compare_strict(current_indentation, self.get_pos())?; + match ordering { + Ordering::Equal => { + // Same same + } + Ordering::Greater => { + // New indentation level: + self.indentation_stack.push(indentation_level); + let tok_pos = self.get_pos(); + self.emit((tok_pos, Tok::Indent, tok_pos)); + } + Ordering::Less => { + // One or more dedentations + // Pop off other levels until col is found: + + loop { + let current_indentation = self.indentation_stack.last().unwrap(); + let ordering = indentation_level + .compare_strict(current_indentation, self.get_pos())?; + match ordering { + Ordering::Less => { + self.indentation_stack.pop(); + let tok_pos = self.get_pos(); + self.emit((tok_pos, Tok::Dedent, tok_pos)); + } + Ordering::Equal => { + // We arrived at proper level of indentation. + break; + } + Ordering::Greater => { + return Err(LexicalError { + error: LexicalErrorType::IndentationError, + location: self.get_pos(), + }); + } + } + } + } + } + } + + Ok(()) + } + + /// Take a look at the next character, if any, and decide upon the next steps. + fn consume_normal(&mut self) -> Result<(), LexicalError> { + // Check if we have some character: + if let Some(c) = self.chr0 { + // First check identifier: + if self.is_identifier_start(c) { + let identifier = self.lex_identifier()?; + self.emit(identifier); + } else if is_emoji_presentation(c) { + let tok_start = self.get_pos(); + self.next_char(); + let tok_end = self.get_pos(); + self.emit(( + tok_start, + Tok::Name { + name: c.to_string(), + }, + tok_end, + )); + } else { + self.consume_character(c)?; + } + } else { + // We reached end of file. + let tok_pos = self.get_pos(); + + // First of all, we need all nestings to be finished. + if self.nesting > 0 { + return Err(LexicalError { + error: LexicalErrorType::NestingError, + location: tok_pos, + }); + } + + // Next, insert a trailing newline, if required. + if !self.at_begin_of_line { + self.at_begin_of_line = true; + self.emit((tok_pos, Tok::Newline, tok_pos)); + } + + // Next, flush the indentation stack to zero. + while self.indentation_stack.len() > 1 { + self.indentation_stack.pop(); + self.emit((tok_pos, Tok::Dedent, tok_pos)); + } + + self.emit((tok_pos, Tok::EndOfFile, tok_pos)); + } + + Ok(()) + } + + /// Okay, we are facing a weird character, what is it? Determine that. + fn consume_character(&mut self, c: char) -> Result<(), LexicalError> { + match c { + '0'..='9' => { + let number = self.lex_number()?; + self.emit(number); + } + '#' => { + self.lex_comment(); + } + '"' | '\'' => { + let string = self.lex_string(false, false, false, false)?; + self.emit(string); + } + '=' => { + let tok_start = self.get_pos(); + self.next_char(); + match self.chr0 { + Some('=') => { + self.next_char(); + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::EqEqual, tok_end)); + } + _ => { + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::Equal, tok_end)); + } + } + } + '+' => { + let tok_start = self.get_pos(); + self.next_char(); + if let Some('=') = self.chr0 { + self.next_char(); + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::PlusEqual, tok_end)); + } else { + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::Plus, tok_end)); + } + } + '*' => { + let tok_start = self.get_pos(); + self.next_char(); + match self.chr0 { + Some('=') => { + self.next_char(); + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::StarEqual, tok_end)); + } + Some('*') => { + self.next_char(); + match self.chr0 { + Some('=') => { + self.next_char(); + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::DoubleStarEqual, tok_end)); + } + _ => { + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::DoubleStar, tok_end)); + } + } + } + _ => { + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::Star, tok_end)); + } + } + } + '/' => { + let tok_start = self.get_pos(); + self.next_char(); + match self.chr0 { + Some('=') => { + self.next_char(); + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::SlashEqual, tok_end)); + } + Some('/') => { + self.next_char(); + match self.chr0 { + Some('=') => { + self.next_char(); + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::DoubleSlashEqual, tok_end)); + } + _ => { + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::DoubleSlash, tok_end)); + } + } + } + _ => { + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::Slash, tok_end)); + } + } + } + '%' => { + let tok_start = self.get_pos(); + self.next_char(); + if let Some('=') = self.chr0 { + self.next_char(); + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::PercentEqual, tok_end)); + } else { + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::Percent, tok_end)); + } + } + '|' => { + let tok_start = self.get_pos(); + self.next_char(); + if let Some('=') = self.chr0 { + self.next_char(); + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::VbarEqual, tok_end)); + } else { + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::Vbar, tok_end)); + } + } + '^' => { + let tok_start = self.get_pos(); + self.next_char(); + if let Some('=') = self.chr0 { + self.next_char(); + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::CircumflexEqual, tok_end)); + } else { + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::CircumFlex, tok_end)); + } + } + '&' => { + let tok_start = self.get_pos(); + self.next_char(); + if let Some('=') = self.chr0 { + self.next_char(); + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::AmperEqual, tok_end)); + } else { + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::Amper, tok_end)); + } + } + '-' => { + let tok_start = self.get_pos(); + self.next_char(); + match self.chr0 { + Some('=') => { + self.next_char(); + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::MinusEqual, tok_end)); + } + Some('>') => { + self.next_char(); + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::Rarrow, tok_end)); + } + _ => { + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::Minus, tok_end)); + } + } + } + '@' => { + let tok_start = self.get_pos(); + self.next_char(); + if let Some('=') = self.chr0 { + self.next_char(); + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::AtEqual, tok_end)); + } else { + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::At, tok_end)); + } + } + '!' => { + let tok_start = self.get_pos(); + self.next_char(); + if let Some('=') = self.chr0 { + self.next_char(); + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::NotEqual, tok_end)); + } else { + return Err(LexicalError { + error: LexicalErrorType::UnrecognizedToken { tok: '!' }, + location: tok_start, + }); + } + } + '~' => { + self.eat_single_char(Tok::Tilde); + } + '(' => { + self.eat_single_char(Tok::Lpar); + self.nesting += 1; + } + ')' => { + self.eat_single_char(Tok::Rpar); + if self.nesting == 0 { + return Err(LexicalError { + error: LexicalErrorType::NestingError, + location: self.get_pos(), + }); + } + self.nesting -= 1; + } + '[' => { + self.eat_single_char(Tok::Lsqb); + self.nesting += 1; + } + ']' => { + self.eat_single_char(Tok::Rsqb); + if self.nesting == 0 { + return Err(LexicalError { + error: LexicalErrorType::NestingError, + location: self.get_pos(), + }); + } + self.nesting -= 1; + } + '{' => { + self.eat_single_char(Tok::Lbrace); + self.nesting += 1; + } + '}' => { + self.eat_single_char(Tok::Rbrace); + if self.nesting == 0 { + return Err(LexicalError { + error: LexicalErrorType::NestingError, + location: self.get_pos(), + }); + } + self.nesting -= 1; + } + ':' => { + let tok_start = self.get_pos(); + self.next_char(); + if let Some('=') = self.chr0 { + self.next_char(); + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::ColonEqual, tok_end)); + } else { + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::Colon, tok_end)); + } + } + ';' => { + self.eat_single_char(Tok::Semi); + } + '<' => { + let tok_start = self.get_pos(); + self.next_char(); + match self.chr0 { + Some('<') => { + self.next_char(); + match self.chr0 { + Some('=') => { + self.next_char(); + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::LeftShiftEqual, tok_end)); + } + _ => { + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::LeftShift, tok_end)); + } + } + } + Some('=') => { + self.next_char(); + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::LessEqual, tok_end)); + } + _ => { + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::Less, tok_end)); + } + } + } + '>' => { + let tok_start = self.get_pos(); + self.next_char(); + match self.chr0 { + Some('>') => { + self.next_char(); + match self.chr0 { + Some('=') => { + self.next_char(); + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::RightShiftEqual, tok_end)); + } + _ => { + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::RightShift, tok_end)); + } + } + } + Some('=') => { + self.next_char(); + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::GreaterEqual, tok_end)); + } + _ => { + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::Greater, tok_end)); + } + } + } + ',' => { + let tok_start = self.get_pos(); + self.next_char(); + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::Comma, tok_end)); + } + '.' => { + if let Some('0'..='9') = self.chr1 { + let number = self.lex_number()?; + self.emit(number); + } else { + let tok_start = self.get_pos(); + self.next_char(); + if let (Some('.'), Some('.')) = (&self.chr0, &self.chr1) { + self.next_char(); + self.next_char(); + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::Ellipsis, tok_end)); + } else { + let tok_end = self.get_pos(); + self.emit((tok_start, Tok::Dot, tok_end)); + } + } + } + '\n' => { + let tok_start = self.get_pos(); + self.next_char(); + let tok_end = self.get_pos(); + + // Depending on the nesting level, we emit newline or not: + if self.nesting == 0 { + self.at_begin_of_line = true; + self.emit((tok_start, Tok::Newline, tok_end)); + } + } + ' ' | '\t' | '\x0C' => { + // Skip whitespaces + self.next_char(); + while self.chr0 == Some(' ') || self.chr0 == Some('\t') || self.chr0 == Some('\x0C') + { + self.next_char(); + } + } + '\\' => { + self.next_char(); + if let Some('\n') = self.chr0 { + self.next_char(); + } else { + return Err(LexicalError { + error: LexicalErrorType::LineContinuationError, + location: self.get_pos(), + }); + } + + if self.chr0.is_none() { + return Err(LexicalError { + error: LexicalErrorType::Eof, + location: self.get_pos(), + }); + } + } + + _ => { + let c = self.next_char(); + return Err(LexicalError { + error: LexicalErrorType::UnrecognizedToken { tok: c.unwrap() }, + location: self.get_pos(), + }); + } // Ignore all the rest.. + } + + Ok(()) + } + + fn eat_single_char(&mut self, ty: Tok) { + let tok_start = self.get_pos(); + self.next_char().unwrap(); + let tok_end = self.get_pos(); + self.emit((tok_start, ty, tok_end)); + } + + /// Helper function to go to the next character coming up. + fn next_char(&mut self) -> Option { + let c = self.chr0; + let nxt = self.chars.next(); + self.chr0 = self.chr1; + self.chr1 = self.chr2; + self.chr2 = nxt; + if c == Some('\n') { + self.location.newline(); + } else { + self.location.go_right(); + } + c + } + + /// Helper function to retrieve the current position. + fn get_pos(&self) -> Location { + self.location + } + + /// Helper function to emit a lexed token to the queue of tokens. + fn emit(&mut self, spanned: Spanned) { + self.pending.push(spanned); + } +} + +/* Implement iterator pattern for the get_tok function. + +Calling the next element in the iterator will yield the next lexical +token. +*/ +impl Iterator for Lexer +where + T: Iterator, +{ + type Item = LexResult; + + fn next(&mut self) -> Option { + // Idea: create some sort of hash map for single char tokens: + // let mut X = HashMap::new(); + // X.insert('=', Tok::Equal); + let token = self.inner_next(); + trace!( + "Lex token {:?}, nesting={:?}, indent stack: {:?}", + token, + self.nesting, + self.indentation_stack + ); + + match token { + Ok((_, Tok::EndOfFile, _)) => None, + r => Some(r), + } + } +} + +#[cfg(test)] +mod tests { + use super::{make_tokenizer, NewlineHandler, Tok}; + use num_bigint::BigInt; + + const WINDOWS_EOL: &str = "\r\n"; + const MAC_EOL: &str = "\r"; + const UNIX_EOL: &str = "\n"; + + pub fn lex_source(source: &str) -> Vec { + let lexer = make_tokenizer(source); + lexer.map(|x| x.unwrap().1).collect() + } + + #[test] + fn test_newline_processor() { + // Escape \ followed by \n (by removal): + let src = "b\\\r\n"; + assert_eq!(4, src.len()); + let nlh = NewlineHandler::new(src.chars()); + let x: Vec = nlh.collect(); + assert_eq!(vec!['b', '\\', '\n'], x); + } + + #[test] + fn test_raw_string() { + let source = "r\"\\\\\" \"\\\\\""; + let tokens = lex_source(source); + assert_eq!( + tokens, + vec![ + Tok::String { + value: "\\\\".to_owned(), + is_fstring: false, + }, + Tok::String { + value: "\\".to_owned(), + is_fstring: false, + }, + Tok::Newline, + ] + ); + } + + #[test] + fn test_numbers() { + let source = "0x2f 0b1101 0 123 0.2 2j 2.2j"; + let tokens = lex_source(source); + assert_eq!( + tokens, + vec![ + Tok::Int { + value: BigInt::from(47), + }, + Tok::Int { + value: BigInt::from(13), + }, + Tok::Int { + value: BigInt::from(0), + }, + Tok::Int { + value: BigInt::from(123), + }, + Tok::Float { value: 0.2 }, + Tok::Complex { + real: 0.0, + imag: 2.0, + }, + Tok::Complex { + real: 0.0, + imag: 2.2, + }, + Tok::Newline, + ] + ); + } + + macro_rules! test_line_comment { + ($($name:ident: $eol:expr,)*) => { + $( + #[test] + fn $name() { + let source = format!(r"99232 # {}", $eol); + let tokens = lex_source(&source); + assert_eq!(tokens, vec![Tok::Int { value: BigInt::from(99232) }, Tok::Newline]); + } + )* + } + } + + test_line_comment! { + test_line_comment_long: " foo", + test_line_comment_whitespace: " ", + test_line_comment_single_whitespace: " ", + test_line_comment_empty: "", + } + + macro_rules! test_comment_until_eol { + ($($name:ident: $eol:expr,)*) => { + $( + #[test] + fn $name() { + let source = format!("123 # Foo{}456", $eol); + let tokens = lex_source(&source); + assert_eq!( + tokens, + vec![ + Tok::Int { value: BigInt::from(123) }, + Tok::Newline, + Tok::Int { value: BigInt::from(456) }, + Tok::Newline, + ] + ) + } + )* + } + } + + test_comment_until_eol! { + test_comment_until_windows_eol: WINDOWS_EOL, + test_comment_until_mac_eol: MAC_EOL, + test_comment_until_unix_eol: UNIX_EOL, + } + + #[test] + fn test_assignment() { + let source = r"avariable = 99 + 2-0"; + let tokens = lex_source(source); + assert_eq!( + tokens, + vec![ + Tok::Name { + name: String::from("avariable"), + }, + Tok::Equal, + Tok::Int { + value: BigInt::from(99) + }, + Tok::Plus, + Tok::Int { + value: BigInt::from(2) + }, + Tok::Minus, + Tok::Int { + value: BigInt::from(0) + }, + Tok::Newline, + ] + ); + } + + macro_rules! test_indentation_with_eol { + ($($name:ident: $eol:expr,)*) => { + $( + #[test] + fn $name() { + let source = format!("def foo():{} return 99{}{}", $eol, $eol, $eol); + let tokens = lex_source(&source); + assert_eq!( + tokens, + vec![ + Tok::Def, + Tok::Name { + name: String::from("foo"), + }, + Tok::Lpar, + Tok::Rpar, + Tok::Colon, + Tok::Newline, + Tok::Indent, + Tok::Return, + Tok::Int { value: BigInt::from(99) }, + Tok::Newline, + Tok::Dedent, + ] + ); + } + )* + }; + } + + test_indentation_with_eol! { + test_indentation_windows_eol: WINDOWS_EOL, + test_indentation_mac_eol: MAC_EOL, + test_indentation_unix_eol: UNIX_EOL, + } + + macro_rules! test_double_dedent_with_eol { + ($($name:ident: $eol:expr,)*) => { + $( + #[test] + fn $name() { + let source = format!("def foo():{} if x:{}{} return 99{}{}", $eol, $eol, $eol, $eol, $eol); + let tokens = lex_source(&source); + assert_eq!( + tokens, + vec![ + Tok::Def, + Tok::Name { + name: String::from("foo"), + }, + Tok::Lpar, + Tok::Rpar, + Tok::Colon, + Tok::Newline, + Tok::Indent, + Tok::If, + Tok::Name { + name: String::from("x"), + }, + Tok::Colon, + Tok::Newline, + Tok::Indent, + Tok::Return, + Tok::Int { value: BigInt::from(99) }, + Tok::Newline, + Tok::Dedent, + Tok::Dedent, + ] + ); + } + )* + } + } + + macro_rules! test_double_dedent_with_tabs { + ($($name:ident: $eol:expr,)*) => { + $( + #[test] + fn $name() { + let source = format!("def foo():{}\tif x:{}{}\t return 99{}{}", $eol, $eol, $eol, $eol, $eol); + let tokens = lex_source(&source); + assert_eq!( + tokens, + vec![ + Tok::Def, + Tok::Name { + name: String::from("foo"), + }, + Tok::Lpar, + Tok::Rpar, + Tok::Colon, + Tok::Newline, + Tok::Indent, + Tok::If, + Tok::Name { + name: String::from("x"), + }, + Tok::Colon, + Tok::Newline, + Tok::Indent, + Tok::Return, + Tok::Int { value: BigInt::from(99) }, + Tok::Newline, + Tok::Dedent, + Tok::Dedent, + ] + ); + } + )* + } + } + + test_double_dedent_with_eol! { + test_double_dedent_windows_eol: WINDOWS_EOL, + test_double_dedent_mac_eol: MAC_EOL, + test_double_dedent_unix_eol: UNIX_EOL, + } + + test_double_dedent_with_tabs! { + test_double_dedent_tabs_windows_eol: WINDOWS_EOL, + test_double_dedent_tabs_mac_eol: MAC_EOL, + test_double_dedent_tabs_unix_eol: UNIX_EOL, + } + + macro_rules! test_newline_in_brackets { + ($($name:ident: $eol:expr,)*) => { + $( + #[test] + fn $name() { + let source = format!("x = [{} 1,2{}]{}", $eol, $eol, $eol); + let tokens = lex_source(&source); + assert_eq!( + tokens, + vec![ + Tok::Name { + name: String::from("x"), + }, + Tok::Equal, + Tok::Lsqb, + Tok::Int { value: BigInt::from(1) }, + Tok::Comma, + Tok::Int { value: BigInt::from(2) }, + Tok::Rsqb, + Tok::Newline, + ] + ); + } + )* + }; + } + + test_newline_in_brackets! { + test_newline_in_brackets_windows_eol: WINDOWS_EOL, + test_newline_in_brackets_mac_eol: MAC_EOL, + test_newline_in_brackets_unix_eol: UNIX_EOL, + } + + #[test] + fn test_operators() { + let source = "//////=/ /"; + let tokens = lex_source(source); + assert_eq!( + tokens, + vec![ + Tok::DoubleSlash, + Tok::DoubleSlash, + Tok::DoubleSlashEqual, + Tok::Slash, + Tok::Slash, + Tok::Newline, + ] + ); + } + + #[test] + fn test_string() { + let source = r#""double" 'single' 'can\'t' "\\\"" '\t\r\n' '\g' r'raw\'' '\420' '\200\0a'"#; + let tokens = lex_source(source); + assert_eq!( + tokens, + vec![ + Tok::String { + value: String::from("double"), + is_fstring: false, + }, + Tok::String { + value: String::from("single"), + is_fstring: false, + }, + Tok::String { + value: String::from("can't"), + is_fstring: false, + }, + Tok::String { + value: String::from("\\\""), + is_fstring: false, + }, + Tok::String { + value: String::from("\t\r\n"), + is_fstring: false, + }, + Tok::String { + value: String::from("\\g"), + is_fstring: false, + }, + Tok::String { + value: String::from("raw\\'"), + is_fstring: false, + }, + Tok::String { + value: String::from("Đ"), + is_fstring: false, + }, + Tok::String { + value: String::from("\u{80}\u{0}a"), + is_fstring: false, + }, + Tok::Newline, + ] + ); + } + + macro_rules! test_string_continuation { + ($($name:ident: $eol:expr,)*) => { + $( + #[test] + fn $name() { + let source = format!("\"abc\\{}def\"", $eol); + let tokens = lex_source(&source); + assert_eq!( + tokens, + vec![ + Tok::String { + value: String::from("abcdef"), + is_fstring: false, + }, + Tok::Newline, + ] + ) + } + )* + } + } + + test_string_continuation! { + test_string_continuation_windows_eol: WINDOWS_EOL, + test_string_continuation_mac_eol: MAC_EOL, + test_string_continuation_unix_eol: UNIX_EOL, + } + + #[test] + fn test_single_quoted_byte() { + // single quote + let source = r##"b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'"##; + let tokens = lex_source(source); + let res = (0..=255).collect::>(); + assert_eq!(tokens, vec![Tok::Bytes { value: res }, Tok::Newline]); + } + + #[test] + fn test_double_quoted_byte() { + // double quote + let source = r##"b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff""##; + let tokens = lex_source(source); + let res = (0..=255).collect::>(); + assert_eq!(tokens, vec![Tok::Bytes { value: res }, Tok::Newline]); + } + + #[test] + fn test_escape_char_in_byte_literal() { + // backslash does not escape + let source = r##"b"omkmok\Xaa""##; + let tokens = lex_source(source); + let res = vec![111, 109, 107, 109, 111, 107, 92, 88, 97, 97]; + assert_eq!(tokens, vec![Tok::Bytes { value: res }, Tok::Newline]); + } + + #[test] + fn test_raw_byte_literal() { + let source = r"rb'\x1z'"; + let tokens = lex_source(source); + assert_eq!( + tokens, + vec![ + Tok::Bytes { + value: b"\\x1z".to_vec() + }, + Tok::Newline + ] + ); + let source = r"rb'\\'"; + let tokens = lex_source(source); + assert_eq!( + tokens, + vec![ + Tok::Bytes { + value: b"\\\\".to_vec() + }, + Tok::Newline + ] + ) + } + + #[test] + fn test_escape_octet() { + let source = r##"b'\43a\4\1234'"##; + let tokens = lex_source(source); + assert_eq!( + tokens, + vec![ + Tok::Bytes { + value: b"#a\x04S4".to_vec() + }, + Tok::Newline + ] + ) + } + + #[test] + fn test_escape_unicode_name() { + let source = r#""\N{EN SPACE}""#; + let tokens = lex_source(source); + assert_eq!( + tokens, + vec![ + Tok::String { + value: "\u{2002}".to_owned(), + is_fstring: false, + }, + Tok::Newline + ] + ) + } +} diff --git a/nac3parser/src/lib.rs b/nac3parser/src/lib.rs new file mode 100644 index 00000000..80030e61 --- /dev/null +++ b/nac3parser/src/lib.rs @@ -0,0 +1,37 @@ +//! This crate can be used to parse python sourcecode into a so +//! called AST (abstract syntax tree). +//! +//! The stages involved in this process are lexical analysis and +//! parsing. The lexical analysis splits the sourcecode into +//! tokens, and the parsing transforms those tokens into an AST. +//! +//! For example, one could do this: +//! +//! ``` +//! use rustpython_parser::{parser, ast}; +//! +//! let python_source = "print('Hello world')"; +//! let python_ast = parser::parse_expression(python_source).unwrap(); +//! +//! ``` + +#![doc(html_logo_url = "https://raw.githubusercontent.com/RustPython/RustPython/master/logo.png")] +#![doc(html_root_url = "https://docs.rs/rustpython-parser/")] + +#[macro_use] +extern crate log; +use lalrpop_util::lalrpop_mod; +pub use rustpython_ast as ast; + +pub mod error; +mod fstring; +mod function; +pub mod lexer; +pub mod mode; +pub mod parser; +lalrpop_mod!( + #[allow(clippy::all)] + #[allow(unused)] + python +); +pub mod token; diff --git a/nac3parser/src/mode.rs b/nac3parser/src/mode.rs new file mode 100644 index 00000000..c03a538e --- /dev/null +++ b/nac3parser/src/mode.rs @@ -0,0 +1,40 @@ +use crate::token::Tok; + +#[derive(Clone, Copy)] +pub enum Mode { + Module, + Interactive, + Expression, +} + +impl Mode { + pub(crate) fn to_marker(self) -> Tok { + match self { + Self::Module => Tok::StartModule, + Self::Interactive => Tok::StartInteractive, + Self::Expression => Tok::StartExpression, + } + } +} + +impl std::str::FromStr for Mode { + type Err = ModeParseError; + fn from_str(s: &str) -> Result { + match s { + "exec" | "single" => Ok(Mode::Module), + "eval" => Ok(Mode::Expression), + _ => Err(ModeParseError { _priv: () }), + } + } +} + +#[derive(Debug)] +pub struct ModeParseError { + _priv: (), +} + +impl std::fmt::Display for ModeParseError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, r#"mode should be "exec", "eval", or "single""#) + } +} diff --git a/nac3parser/src/parser.rs b/nac3parser/src/parser.rs new file mode 100644 index 00000000..02845f77 --- /dev/null +++ b/nac3parser/src/parser.rs @@ -0,0 +1,167 @@ +//! Python parsing. +//! +//! Use this module to parse python code into an AST. +//! There are three ways to parse python code. You could +//! parse a whole program, a single statement, or a single +//! expression. + +use std::iter; + +use crate::ast; +use crate::error::ParseError; +use crate::lexer; +pub use crate::mode::Mode; +use crate::python; + +/* + * Parse python code. + * Grammar may be inspired by antlr grammar for python: + * https://github.com/antlr/grammars-v4/tree/master/python3 + */ + +/// Parse a full python program, containing usually multiple lines. +pub fn parse_program(source: &str) -> Result { + parse(source, Mode::Module).map(|top| match top { + ast::Mod::Module { body, .. } => body, + _ => unreachable!(), + }) +} + +/// Parses a python expression +/// +/// # Example +/// ``` +/// extern crate num_bigint; +/// use rustpython_parser::{parser, ast}; +/// let expr = parser::parse_expression("1 + 2").unwrap(); +/// +/// assert_eq!( +/// expr, +/// ast::Expr { +/// location: ast::Location::new(1, 3), +/// custom: (), +/// node: ast::ExprKind::BinOp { +/// left: Box::new(ast::Expr { +/// location: ast::Location::new(1, 1), +/// custom: (), +/// node: ast::ExprKind::Constant { +/// value: ast::Constant::Int(1.into()), +/// kind: None, +/// } +/// }), +/// op: ast::Operator::Add, +/// right: Box::new(ast::Expr { +/// location: ast::Location::new(1, 5), +/// custom: (), +/// node: ast::ExprKind::Constant { +/// value: ast::Constant::Int(2.into()), +/// kind: None, +/// } +/// }) +/// } +/// }, +/// ); +/// +/// ``` +pub fn parse_expression(source: &str) -> Result { + parse(source, Mode::Expression).map(|top| match top { + ast::Mod::Expression { body } => *body, + _ => unreachable!(), + }) +} + +// Parse a given source code +pub fn parse(source: &str, mode: Mode) -> Result { + let lxr = lexer::make_tokenizer(source); + let marker_token = (Default::default(), mode.to_marker(), Default::default()); + let tokenizer = iter::once(Ok(marker_token)).chain(lxr); + + python::TopParser::new() + .parse(tokenizer) + .map_err(ParseError::from) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_empty() { + let parse_ast = parse_program("").unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_print_hello() { + let source = String::from("print('Hello world')"); + let parse_ast = parse_program(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_print_2() { + let source = String::from("print('Hello world', 2)"); + let parse_ast = parse_program(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_kwargs() { + let source = String::from("my_func('positional', keyword=2)"); + let parse_ast = parse_program(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_if_elif_else() { + let source = String::from("if 1: 10\nelif 2: 20\nelse: 30"); + let parse_ast = parse_program(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_lambda() { + let source = "lambda x, y: x * y"; // lambda(x, y): x * y"; + let parse_ast = parse_program(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_tuples() { + let source = "a, b = 4, 5"; + + insta::assert_debug_snapshot!(parse_program(&source).unwrap()); + } + + #[test] + fn test_parse_class() { + let source = "\ +class Foo(A, B): + def __init__(self): + pass + def method_with_default(self, arg='default'): + pass"; + insta::assert_debug_snapshot!(parse_program(&source).unwrap()); + } + + #[test] + fn test_parse_dict_comprehension() { + let source = String::from("{x1: x2 for y in z}"); + let parse_ast = parse_expression(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_list_comprehension() { + let source = String::from("[x for y in z]"); + let parse_ast = parse_expression(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_double_list_comprehension() { + let source = String::from("[x for y, y2 in z for a in b if a < 5 if a > 10]"); + let parse_ast = parse_expression(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } +} diff --git a/nac3parser/src/python.lalrpop b/nac3parser/src/python.lalrpop new file mode 100644 index 00000000..c76f2690 --- /dev/null +++ b/nac3parser/src/python.lalrpop @@ -0,0 +1,1306 @@ +// See also: file:///usr/share/doc/python/html/reference/grammar.html?highlight=grammar +// See also: https://github.com/antlr/grammars-v4/blob/master/python3/Python3.g4 +// See also: file:///usr/share/doc/python/html/reference/compound_stmts.html#function-definitions +// See also: https://greentreesnakes.readthedocs.io/en/latest/nodes.html#keyword + +use std::iter::FromIterator; + +use crate::ast; +use crate::fstring::parse_located_fstring; +use crate::function::{ArgumentList, parse_args, parse_params}; +use crate::error::LexicalError; +use crate::lexer; + +use num_bigint::BigInt; + +grammar; + +// This is a hack to reduce the amount of lalrpop tables generated: +// For each public entry point, a full parse table is generated. +// By having only a single pub function, we reduce this to one. +pub Top: ast::Mod = { + StartModule => ast::Mod::Module { body, type_ignores: vec![] }, + StartInteractive => ast::Mod::Interactive { body }, + StartExpression ("\n")* => ast::Mod::Expression { body: Box::new(body) }, +}; + +Program: ast::Suite = { + => { + lines.into_iter().flatten().collect() + }, +}; + +// A file line either has a declaration, or an empty newline: +FileLine: ast::Suite = { + Statement, + "\n" => vec![], +}; + +Suite: ast::Suite = { + SimpleStatement, + "\n" Indent Dedent => s.into_iter().flatten().collect(), +}; + +Statement: ast::Suite = { + SimpleStatement, + => vec![s], +}; + +SimpleStatement: ast::Suite = { + ";"? "\n" => { + let mut statements = vec![s1]; + statements.extend(s2.into_iter().map(|e| e.1)); + statements + } +}; + +SmallStatement: ast::Stmt = { + ExpressionStatement, + PassStatement, + DelStatement, + FlowStatement, + ImportStatement, + GlobalStatement, + NonlocalStatement, + AssertStatement, +}; + +PassStatement: ast::Stmt = { + "pass" => { + ast::Stmt { + location, + custom: (), + node: ast::StmtKind::Pass, + } + }, +}; + +DelStatement: ast::Stmt = { + "del" => { + ast::Stmt { + location, + custom: (), + node: ast::StmtKind::Delete { targets }, + } + }, +}; + +ExpressionStatement: ast::Stmt = { + => { + // Just an expression, no assignment: + if suffix.is_empty() { + ast::Stmt { + custom: (), + location, + node: ast::StmtKind::Expr { value: Box::new(expression) } + } + } else { + let mut targets = vec![expression]; + let mut values = suffix; + + while values.len() > 1 { + targets.push(values.remove(0)); + } + + let value = Box::new(values.into_iter().next().unwrap()); + + ast::Stmt { + custom: (), + location, + node: ast::StmtKind::Assign { targets, value, type_comment: None }, + } + } + }, + => { + ast::Stmt { + custom: (), + location, + node: ast::StmtKind::AugAssign { + target: Box::new(target), + op, + value: Box::new(rhs) + }, + } + }, + ":" => { + let simple = matches!(target.node, ast::ExprKind::Name { .. }); + ast::Stmt { + custom: (), + location, + node: ast::StmtKind::AnnAssign { + target: Box::new(target), + annotation: Box::new(annotation), + value: rhs.map(Box::new), + simple, + }, + } + }, +}; + +AssignSuffix: ast::Expr = { + "=" => e +}; + +TestListOrYieldExpr: ast::Expr = { + TestList, + YieldExpr +} + +#[inline] +TestOrStarExprList: ast::Expr = { + // as far as I can tell, these were the same + TestList +}; + +TestOrStarNamedExprList: ast::Expr = { + GenericList +}; + +TestOrStarExpr: ast::Expr = { + Test, + StarExpr, +}; + +TestOrStarNamedExpr: ast::Expr = { + NamedExpressionTest, + StarExpr, +}; + +AugAssign: ast::Operator = { + "+=" => ast::Operator::Add, + "-=" => ast::Operator::Sub, + "*=" => ast::Operator::Mult, + "@=" => ast::Operator::MatMult, + "/=" => ast::Operator::Div, + "%=" => ast::Operator::Mod, + "&=" => ast::Operator::BitAnd, + "|=" => ast::Operator::BitOr, + "^=" => ast::Operator::BitXor, + "<<=" => ast::Operator::LShift, + ">>=" => ast::Operator::RShift, + "**=" => ast::Operator::Pow, + "//=" => ast::Operator::FloorDiv, +}; + +FlowStatement: ast::Stmt = { + "break" => { + ast::Stmt { + custom: (), + location, + node: ast::StmtKind::Break, + } + }, + "continue" => { + ast::Stmt { + custom: (), + location, + node: ast::StmtKind::Continue, + } + }, + "return" => { + ast::Stmt { + custom: (), + location, + node: ast::StmtKind::Return { value: value.map(Box::new) }, + } + }, + => { + ast::Stmt { + custom: (), + location, + node: ast::StmtKind::Expr { value: Box::new(expression) }, + } + }, + RaiseStatement, +}; + +RaiseStatement: ast::Stmt = { + "raise" => { + ast::Stmt { + custom: (), + location, + node: ast::StmtKind::Raise { exc: None, cause: None }, + } + }, + "raise" => { + ast::Stmt { + custom: (), + location, + node: ast::StmtKind::Raise { exc: Some(Box::new(t)), cause: c.map(|x| Box::new(x.1)) }, + } + }, +}; + +ImportStatement: ast::Stmt = { + "import" >> => { + ast::Stmt { + custom: (), + location, + node: ast::StmtKind::Import { names }, + } + }, + "from" "import" => { + let (level, module) = source; + ast::Stmt { + custom: (), + location, + node: ast::StmtKind::ImportFrom { + level, + module, + names + }, + } + }, +}; + +ImportFromLocation: (usize, Option) = { + => { + (dots.iter().sum(), Some(name)) + }, + => { + (dots.iter().sum(), None) + }, +}; + +ImportDots: usize = { + "..." => 3, + "." => 1, +}; + +ImportAsNames: Vec = { + >> => i, + "(" >> ","? ")" => i, + "*" => { + // Star import all + vec![ast::Alias { name: "*".to_string(), asname: None }] + }, +}; + + +#[inline] +ImportAsAlias: ast::Alias = { + => ast::Alias { name, asname: a.map(|a| a.1) }, +}; + +// A name like abc or abc.def.ghi +DottedName: String = { + => n, + => { + let mut r = n.to_string(); + for x in n2 { + r.push_str("."); + r.push_str(&x.1); + } + r + }, +}; + +GlobalStatement: ast::Stmt = { + "global" > => { + ast::Stmt { + custom: (), + location, + node: ast::StmtKind::Global { names } + } + }, +}; + +NonlocalStatement: ast::Stmt = { + "nonlocal" > => { + ast::Stmt { + custom: (), + location, + node: ast::StmtKind::Nonlocal { names } + } + }, +}; + +AssertStatement: ast::Stmt = { + "assert" => { + ast::Stmt { + custom: (), + location, + node: ast::StmtKind::Assert { + test: Box::new(test), + msg: msg.map(|e| Box::new(e.1)) + } + } + }, +}; + +CompoundStatement: ast::Stmt = { + IfStatement, + WhileStatement, + ForStatement, + TryStatement, + WithStatement, + FuncDef, + ClassDef, +}; + +IfStatement: ast::Stmt = { + "if" ":" => { + // Determine last else: + let mut last = s3.map(|s| s.2).unwrap_or_default(); + + // handle elif: + for i in s2.into_iter().rev() { + let x = ast::Stmt { + custom: (), + location: i.0, + node: ast::StmtKind::If { test: Box::new(i.2), body: i.4, orelse: last }, + }; + last = vec![x]; + } + + ast::Stmt { + custom: (), + location, + node: ast::StmtKind::If { test: Box::new(test), body, orelse: last } + } + }, +}; + +WhileStatement: ast::Stmt = { + "while" ":" => { + let orelse = s2.map(|s| s.2).unwrap_or_default(); + ast::Stmt { + custom: (), + location, + node: ast::StmtKind::While { + test: Box::new(test), + body, + orelse + }, + } + }, +}; + +ForStatement: ast::Stmt = { + "for" "in" ":" => { + let orelse = s2.map(|s| s.2).unwrap_or_default(); + let target = Box::new(target); + let iter = Box::new(iter); + let type_comment = None; + let node = if is_async.is_some() { + ast::StmtKind::AsyncFor { target, iter, body, orelse, type_comment } + } else { + ast::StmtKind::For { target, iter, body, orelse, type_comment } + }; + ast::Stmt::new(location, node) + }, +}; + +TryStatement: ast::Stmt = { + "try" ":" => { + let orelse = else_suite.map(|s| s.2).unwrap_or_default(); + let finalbody = finally.map(|s| s.2).unwrap_or_default(); + ast::Stmt { + custom: (), + location, + node: ast::StmtKind::Try { + body, + handlers, + orelse, + finalbody, + }, + } + }, + "try" ":" => { + let handlers = vec![]; + let orelse = vec![]; + let finalbody = finally.2; + ast::Stmt { + custom: (), + location, + node: ast::StmtKind::Try { + body, + handlers, + orelse, + finalbody, + }, + } + }, +}; + +ExceptClause: ast::Excepthandler = { + "except" ":" => { + ast::Excepthandler::new( + location, + ast::ExcepthandlerKind::ExceptHandler { + type_: typ.map(Box::new), + name: None, + body, + }, + ) + }, + "except" ":" => { + ast::Excepthandler::new( + location, + ast::ExcepthandlerKind::ExceptHandler { + type_: Some(Box::new(x.0)), + name: Some(x.2), + body, + }, + ) + }, +}; + +WithStatement: ast::Stmt = { + "with" > ":" => { + let type_comment = None; + let node = if is_async.is_some() { + ast::StmtKind::AsyncWith { items, body, type_comment } + } else { + ast::StmtKind::With { items, body, type_comment } + }; + ast::Stmt::new(location, node) + }, +}; + +WithItem: ast::Withitem = { + => { + let optional_vars = n.map(|val| Box::new(val.1)); + let context_expr = Box::new(context_expr); + ast::Withitem { context_expr, optional_vars } + }, +}; + +FuncDef: ast::Stmt = { + "def" " Test)?> ":" => { + let args = Box::new(args); + let returns = r.map(|x| Box::new(x.1)); + let type_comment = None; + let node = if is_async.is_some() { + ast::StmtKind::AsyncFunctionDef { name, args, body, decorator_list, returns, type_comment } + } else { + ast::StmtKind::FunctionDef { name, args, body, decorator_list, returns, type_comment } + }; + ast::Stmt::new(location, node) + }, +}; + +Parameters: ast::Arguments = { + "(" )?> ")" => { + a.unwrap_or_else(|| ast::Arguments { + posonlyargs: vec![], + args: vec![], + vararg: None, + kwonlyargs: vec![], + kw_defaults: vec![], + kwarg: None, + defaults: vec![] + }) + } +}; + +// Note that this is a macro which is used once for function defs, and +// once for lambda defs. +ParameterList: ast::Arguments = { + > )?> ","? =>? { + let (posonlyargs, args, defaults) = parse_params(param1)?; + + // Now gather rest of parameters: + let (vararg, kwonlyargs, kw_defaults, kwarg) = args2.map_or((None, vec![], vec![], None), |x| x.1); + + Ok(ast::Arguments { + posonlyargs, + args, + kwonlyargs, + vararg, + kwarg, + defaults, + kw_defaults, + }) + }, + > )> ","? =>? { + let (posonlyargs, args, defaults) = parse_params(param1)?; + + // Now gather rest of parameters: + let vararg = None; + let kwonlyargs = vec![]; + let kw_defaults = vec![]; + let kwarg = kw.1; + + Ok(ast::Arguments { + posonlyargs, + args, + kwonlyargs, + vararg, + kwarg, + defaults, + kw_defaults, + }) + }, + > ","? => { + let (vararg, kwonlyargs, kw_defaults, kwarg) = params; + ast::Arguments { + posonlyargs: vec![], + args: vec![], + kwonlyargs, + vararg, + kwarg, + defaults: vec![], + kw_defaults, + } + }, + > ","? => { + ast::Arguments { + posonlyargs: vec![], + args: vec![], + kwonlyargs: vec![], + vararg: None, + kwarg, + defaults: vec![], + kw_defaults: vec![], + } + }, +}; + +// Use inline here to make sure the "," is not creating an ambiguity. +#[inline] +ParameterDefs: (Vec<(ast::Arg, Option)>, Vec<(ast::Arg, Option)>) = { + >> => { + (vec![], args) + }, + >> "," "/" )*> => { + (pos_args, args.into_iter().map(|e| e.1).collect()) + }, +}; + +ParameterDef: (ast::Arg, Option) = { + => (i, None), + "=" => (i, Some(e)), +}; + +UntypedParameter: ast::Arg = { + => ast::Arg::new( + location, + ast::ArgData { arg, annotation: None, type_comment: None }, + ), +}; + +TypedParameter: ast::Arg = { + => { + let annotation = a.map(|x| Box::new(x.1)); + ast::Arg::new(location, ast::ArgData { arg, annotation, type_comment: None }) + }, +}; + +// Use inline here to make sure the "," is not creating an ambiguity. +// TODO: figure out another grammar that makes this inline no longer required. +#[inline] +ParameterListStarArgs: (Option>, Vec, Vec>>, Option>) = { + "*" )*> )?> => { + // Extract keyword arguments: + let mut kwonlyargs = vec![]; + let mut kw_defaults = vec![]; + for (name, value) in kw.into_iter().map(|x| x.1) { + kwonlyargs.push(name); + kw_defaults.push(value.map(Box::new)); + } + + let kwarg = kwarg.map(|n| n.1).flatten(); + let va = va.map(Box::new); + + (va, kwonlyargs, kw_defaults, kwarg) + } +}; + +KwargParameter: Option> = { + "**" => { + kwarg.map(Box::new) + } +}; + +ClassDef: ast::Stmt = { + "class" ":" => { + let (bases, keywords) = match a { + Some((_, arg, _)) => (arg.args, arg.keywords), + None => (vec![], vec![]), + }; + ast::Stmt { + custom: (), + location, + node: ast::StmtKind::ClassDef { + name, + bases, + keywords, + body, + decorator_list, + }, + } + }, +}; + +// Decorators: +Decorator: ast::Expr = { + "@" "\n" => { + p + }, +}; + +YieldExpr: ast::Expr = { + "yield" => ast::Expr { + location, + custom: (), + node: ast::ExprKind::Yield { value: value.map(Box::new) } + }, + "yield" "from" => ast::Expr { + location, + custom: (), + node: ast::ExprKind::YieldFrom { value: Box::new(e) } + }, +}; + +Test: ast::Expr = { + => { + if let Some(c) = condition { + ast::Expr { + location: c.0, + custom: (), + node: ast::ExprKind::IfExp { + test: Box::new(c.2), + body: Box::new(expr), + orelse: Box::new(c.4), + } + } + } else { + expr + } + }, + LambdaDef, +}; + +NamedExpressionTest: ast::Expr = { + ?> ":" => { + let p = p.unwrap_or_else(|| { + ast::Arguments { + posonlyargs: vec![], + args: vec![], + vararg: None, + kwonlyargs: vec![], + kw_defaults: vec![], + kwarg: None, + defaults: vec![] + } + }); + ast::Expr { + location, + custom: (), + node: ast::ExprKind::Lambda { + args: Box::new(p), + body: Box::new(body) + } + } + } +} + +OrTest: ast::Expr = { + => { + if e2.is_empty() { + e1 + } else { + let mut values = vec![e1]; + values.extend(e2.into_iter().map(|e| e.1)); + ast::Expr { + location, + custom: (), + node: ast::ExprKind::BoolOp { op: ast::Boolop::Or, values } + } + } + }, +}; + +AndTest: ast::Expr = { + => { + if e2.is_empty() { + e1 + } else { + let mut values = vec![e1]; + values.extend(e2.into_iter().map(|e| e.1)); + ast::Expr { + location, + custom: (), + node: ast::ExprKind::BoolOp { op: ast::Boolop::And, values } + } + } + }, +}; + +NotTest: ast::Expr = { + "not" => ast::Expr { + location, + custom: (), + node: ast::ExprKind::UnaryOp { operand: Box::new(e), op: ast::Unaryop::Not } + }, + Comparison, +}; + +Comparison: ast::Expr = { + => { + let (ops, comparators) = comparisons.into_iter().unzip(); + ast::Expr { + location, + custom: (), + node: ast::ExprKind::Compare { left: Box::new(left), ops, comparators } + } + }, + Expression, +}; + +CompOp: ast::Cmpop = { + "==" => ast::Cmpop::Eq, + "!=" => ast::Cmpop::NotEq, + "<" => ast::Cmpop::Lt, + "<=" => ast::Cmpop::LtE, + ">" => ast::Cmpop::Gt, + ">=" => ast::Cmpop::GtE, + "in" => ast::Cmpop::In, + "not" "in" => ast::Cmpop::NotIn, + "is" => ast::Cmpop::Is, + "is" "not" => ast::Cmpop::IsNot, +}; + +Expression: ast::Expr = { + "|" => ast::Expr { + location, + custom: (), + node: ast::ExprKind::BinOp { left: Box::new(e1), op: ast::Operator::BitOr, right: Box::new(e2) } + }, + XorExpression, +}; + +XorExpression: ast::Expr = { + "^" => ast::Expr { + location, + custom: (), + node: ast::ExprKind::BinOp { left: Box::new(e1), op: ast::Operator::BitXor, right: Box::new(e2) } + }, + AndExpression, +}; + +AndExpression: ast::Expr = { + "&" => ast::Expr { + location, + custom: (), + node: ast::ExprKind::BinOp { left: Box::new(e1), op: ast::Operator::BitAnd, right: Box::new(e2) } + }, + ShiftExpression, +}; + +ShiftExpression: ast::Expr = { + => ast::Expr { + location, + custom: (), + node: ast::ExprKind::BinOp { left: Box::new(e1), op, right: Box::new(e2) } + }, + ArithmaticExpression, +}; + +ShiftOp: ast::Operator = { + "<<" => ast::Operator::LShift, + ">>" => ast::Operator::RShift, +}; + +ArithmaticExpression: ast::Expr = { + => ast::Expr { + location, + custom: (), + node: ast::ExprKind::BinOp { left: Box::new(a), op, right: Box::new(b) } + }, + Term, +}; + +AddOp: ast::Operator = { + "+" => ast::Operator::Add, + "-" => ast::Operator::Sub, +}; + +Term: ast::Expr = { + => ast::Expr { + location, + custom: (), + node: ast::ExprKind::BinOp { left: Box::new(a), op, right: Box::new(b) } + }, + Factor, +}; + +MulOp: ast::Operator = { + "*" => ast::Operator::Mult, + "/" => ast::Operator::Div, + "//" => ast::Operator::FloorDiv, + "%" => ast::Operator::Mod, + "@" => ast::Operator::MatMult, +}; + +Factor: ast::Expr = { + => ast::Expr { + location, + custom: (), + node: ast::ExprKind::UnaryOp { operand: Box::new(e), op } + }, + Power, +}; + +UnaryOp: ast::Unaryop = { + "+" => ast::Unaryop::UAdd, + "-" => ast::Unaryop::USub, + "~" => ast::Unaryop::Invert, +}; + +Power: ast::Expr = { + => { + match e2 { + None => e, + Some((location, _, b)) => ast::Expr { + location, + custom: (), + node: ast::ExprKind::BinOp { left: Box::new(e), op: ast::Operator::Pow, right: Box::new(b) } + }, + } + } +}; + +AtomExpr: ast::Expr = { + => { + if is_await.is_some() { + ast::Expr { + location, + custom: (), + node: ast::ExprKind::Await { value: Box::new(atom) } + } + } else { + atom + } + } +} + +AtomExpr2: ast::Expr = { + Atom, + "(" ")" => { + ast::Expr { + location, + custom: (), + node: ast::ExprKind::Call { func: Box::new(f), args: a.args, keywords: a.keywords } + } + }, + "[" "]" => ast::Expr { + location, + custom: (), + node: ast::ExprKind::Subscript { value: Box::new(e), slice: Box::new(s), ctx: ast::ExprContext::Load } + }, + "." => ast::Expr { + location, + custom: (), + node: ast::ExprKind::Attribute { value: Box::new(e), attr, ctx: ast::ExprContext::Load } + }, +}; + +SubscriptList: ast::Expr = { + ","? => { + if s2.is_empty() { + s1 + } else { + let mut dims = vec![s1]; + for x in s2 { + dims.push(x.1) + } + + ast::Expr { + location, + custom: (), + node: ast::ExprKind::Tuple { elts: dims, ctx: ast::ExprContext::Load }, + } + } + } +}; + +Subscript: ast::Expr = { + Test, + ":" => { + let lower = e1.map(Box::new); + let upper = e2.map(Box::new); + let step = e3.flatten().map(Box::new); + ast::Expr { + location, + custom: (), + node: ast::ExprKind::Slice { lower, upper, step } + } + } +}; + +SliceOp: Option = { + ":" => e, +} + +Atom: ast::Expr = { + =>? { + let values = s.into_iter().map(|(loc, (value, is_fstring))| { + if is_fstring { + parse_located_fstring(&value, loc) + } else { + Ok(ast::Expr::new( + loc, + ast::ExprKind::Constant { value: value.into(), kind: None }, + )) + } + }); + let values = values.collect::, _>>()?; + + Ok(if values.len() > 1 { + ast::Expr::new(location, ast::ExprKind::JoinedStr { values }) + } else { + values.into_iter().next().unwrap() + }) + }, + => ast::Expr { + location, + custom: (), + node: ast::ExprKind::Constant { value, kind: None } + }, + => ast::Expr { + location, + custom: (), + node: ast::ExprKind::Name { id: name, ctx: ast::ExprContext::Load } + }, + "[" "]" => { + let elts = e.unwrap_or_default(); + ast::Expr { + location, + custom: (), + node: ast::ExprKind::List { elts, ctx: ast::ExprContext::Load } + } + }, + "[" "]" => { + ast::Expr { + location, + custom: (), + node: ast::ExprKind::ListComp { elt: Box::new(elt), generators } + } + }, + "(" ")" => { + elements.unwrap_or(ast::Expr { + location, + custom: (), + node: ast::ExprKind::Tuple { elts: Vec::new(), ctx: ast::ExprContext::Load } + }) + }, + "(" ")" => e, + "(" ")" => { + ast::Expr { + location, + custom: (), + node: ast::ExprKind::GeneratorExp { elt: Box::new(elt), generators } + } + }, + "{" "}" => { + let (keys, values) = e.unwrap_or_default(); + ast::Expr { + location, + custom: (), + node: ast::ExprKind::Dict { keys, values } + } + }, + "{" "}" => { + ast::Expr { + location, + custom: (), + node: ast::ExprKind::DictComp { + key: Box::new(e1.0), + value: Box::new(e1.1), + generators, + } + } + }, + "{" "}" => ast::Expr { + location, + custom: (), + node: ast::ExprKind::Set { elts } + }, + "{" "}" => { + ast::Expr { + location, + custom: (), + node: ast::ExprKind::SetComp { elt: Box::new(elt), generators } + } + }, + "True" => ast::Expr::new(location, ast::ExprKind::Constant { value: true.into(), kind: None }), + "False" => ast::Expr::new(location, ast::ExprKind::Constant { value: false.into(), kind: None }), + "None" => ast::Expr::new(location, ast::ExprKind::Constant { value: ast::Constant::None, kind: None }), + "..." => ast::Expr::new(location, ast::ExprKind::Constant { value: ast::Constant::Ellipsis, kind: None }), +}; + +ListLiteralValues: Vec = { + > ","? => e, +}; + +DictLiteralValues: (Vec>>, Vec) = { + > ","? => elements.into_iter().unzip(), +}; + +DictEntry: (ast::Expr, ast::Expr) = { + ":" => (e1, e2), +}; + +DictElement: (Option>, ast::Expr) = { + => (Some(Box::new(e.0)), e.1), + "**" => (None, e), +}; + +SetLiteralValues: Vec = { + > ","? => e1 +}; + +ExpressionOrStarExpression = { + Expression, + StarExpr +}; + +ExpressionList: ast::Expr = { + GenericList +}; + +ExpressionList2: Vec = { + > ","? => elements, +}; + +// A test list is one of: +// - a list of expressions +// - a single expression +// - a single expression followed by a trailing comma +#[inline] +TestList: ast::Expr = { + GenericList +}; + +GenericList: ast::Expr = { + > => { + if elts.len() == 1 && trailing_comma.is_none() { + elts.into_iter().next().unwrap() + } else { + ast::Expr { + location, + custom: (), + node: ast::ExprKind::Tuple { elts, ctx: ast::ExprContext::Load } + } + } + } +} + +// Test +StarExpr: ast::Expr = { + "*" => ast::Expr { + location, + custom: (), + node: ast::ExprKind::Starred { value: Box::new(e), ctx: ast::ExprContext::Load }, + } +}; + +// Comprehensions: +CompFor: Vec = => c; + +SingleForComprehension: ast::Comprehension = { + "for" "in" => { + let is_async = is_async.is_some(); + ast::Comprehension { + target: Box::new(target), + iter: Box::new(iter), + ifs, + is_async + } + } +}; + +ExpressionNoCond: ast::Expr = OrTest; +ComprehensionIf: ast::Expr = "if" => c; + +ArgumentList: ArgumentList = { + > =>? { + let arg_list = parse_args(e)?; + Ok(arg_list) + } +}; + +FunctionArgument: (Option<(ast::Location, Option)>, ast::Expr) = { + => { + let expr = match c { + Some(c) => ast::Expr { + location: e.location, + custom: (), + node: ast::ExprKind::GeneratorExp { + elt: Box::new(e), + generators: c, + } + }, + None => e, + }; + (None, expr) + }, + "=" => (Some((location, Some(i))), e), + "*" => { + let expr = ast::Expr::new( + location, + ast::ExprKind::Starred { value: Box::new(e), ctx: ast::ExprContext::Load }, + ); + (None, expr) + }, + "**" => (Some((location, None)), e), +}; + +#[inline] +Comma: Vec = { + ",")*> => { + let mut items = items; + items.extend(last); + items + } +}; + +#[inline] +OneOrMore: Vec = { + => { + let mut items = vec![i1]; + items.extend(i2.into_iter().map(|e| e.1)); + items + } +}; + +Constant: ast::Constant = { + => ast::Constant::Bytes(b.into_iter().flatten().collect()), + => ast::Constant::Int(value), + => ast::Constant::Float(value), + => ast::Constant::Complex { real: s.0, imag: s.1 }, +}; + +Bytes: Vec = { + => { + s.into_iter().flatten().collect::>() + }, +}; + +Identifier: String = => s; + +// Hook external lexer: +extern { + type Location = ast::Location; + type Error = LexicalError; + + enum lexer::Tok { + Indent => lexer::Tok::Indent, + Dedent => lexer::Tok::Dedent, + StartModule => lexer::Tok::StartModule, + StartInteractive => lexer::Tok::StartInteractive, + StartExpression => lexer::Tok::StartExpression, + "+" => lexer::Tok::Plus, + "-" => lexer::Tok::Minus, + "~" => lexer::Tok::Tilde, + ":" => lexer::Tok::Colon, + "." => lexer::Tok::Dot, + "..." => lexer::Tok::Ellipsis, + "," => lexer::Tok::Comma, + "*" => lexer::Tok::Star, + "**" => lexer::Tok::DoubleStar, + "&" => lexer::Tok::Amper, + "@" => lexer::Tok::At, + "%" => lexer::Tok::Percent, + "//" => lexer::Tok::DoubleSlash, + "^" => lexer::Tok::CircumFlex, + "|" => lexer::Tok::Vbar, + "<<" => lexer::Tok::LeftShift, + ">>" => lexer::Tok::RightShift, + "/" => lexer::Tok::Slash, + "(" => lexer::Tok::Lpar, + ")" => lexer::Tok::Rpar, + "[" => lexer::Tok::Lsqb, + "]" => lexer::Tok::Rsqb, + "{" => lexer::Tok::Lbrace, + "}" => lexer::Tok::Rbrace, + "=" => lexer::Tok::Equal, + "+=" => lexer::Tok::PlusEqual, + "-=" => lexer::Tok::MinusEqual, + "*=" => lexer::Tok::StarEqual, + "@=" => lexer::Tok::AtEqual, + "/=" => lexer::Tok::SlashEqual, + "%=" => lexer::Tok::PercentEqual, + "&=" => lexer::Tok::AmperEqual, + "|=" => lexer::Tok::VbarEqual, + "^=" => lexer::Tok::CircumflexEqual, + "<<=" => lexer::Tok::LeftShiftEqual, + ">>=" => lexer::Tok::RightShiftEqual, + "**=" => lexer::Tok::DoubleStarEqual, + "//=" => lexer::Tok::DoubleSlashEqual, + ":=" => lexer::Tok::ColonEqual, + "==" => lexer::Tok::EqEqual, + "!=" => lexer::Tok::NotEqual, + "<" => lexer::Tok::Less, + "<=" => lexer::Tok::LessEqual, + ">" => lexer::Tok::Greater, + ">=" => lexer::Tok::GreaterEqual, + "->" => lexer::Tok::Rarrow, + "and" => lexer::Tok::And, + "as" => lexer::Tok::As, + "assert" => lexer::Tok::Assert, + "async" => lexer::Tok::Async, + "await" => lexer::Tok::Await, + "break" => lexer::Tok::Break, + "class" => lexer::Tok::Class, + "continue" => lexer::Tok::Continue, + "def" => lexer::Tok::Def, + "del" => lexer::Tok::Del, + "elif" => lexer::Tok::Elif, + "else" => lexer::Tok::Else, + "except" => lexer::Tok::Except, + "finally" => lexer::Tok::Finally, + "for" => lexer::Tok::For, + "from" => lexer::Tok::From, + "global" => lexer::Tok::Global, + "if" => lexer::Tok::If, + "in" => lexer::Tok::In, + "is" => lexer::Tok::Is, + "import" => lexer::Tok::Import, + "from" => lexer::Tok::From, + "lambda" => lexer::Tok::Lambda, + "nonlocal" => lexer::Tok::Nonlocal, + "not" => lexer::Tok::Not, + "or" => lexer::Tok::Or, + "pass" => lexer::Tok::Pass, + "raise" => lexer::Tok::Raise, + "return" => lexer::Tok::Return, + "try" => lexer::Tok::Try, + "while" => lexer::Tok::While, + "with" => lexer::Tok::With, + "yield" => lexer::Tok::Yield, + "True" => lexer::Tok::True, + "False" => lexer::Tok::False, + "None" => lexer::Tok::None, + int => lexer::Tok::Int { value: }, + float => lexer::Tok::Float { value: }, + complex => lexer::Tok::Complex { real: , imag: }, + string => lexer::Tok::String { value: , is_fstring: }, + bytes => lexer::Tok::Bytes { value: > }, + name => lexer::Tok::Name { name: }, + "\n" => lexer::Tok::Newline, + ";" => lexer::Tok::Semi, + } +} diff --git a/nac3parser/src/snapshots/rustpython_parser__fstring__tests__fstring_parse_selfdocumenting_base.snap b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__fstring_parse_selfdocumenting_base.snap new file mode 100644 index 00000000..33156007 --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__fstring_parse_selfdocumenting_base.snap @@ -0,0 +1,63 @@ +--- +source: parser/src/fstring.rs +expression: parse_ast +--- +Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: JoinedStr { + values: [ + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: Constant { + value: Str( + "user=", + ), + kind: None, + }, + }, + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: Constant { + value: Str( + "", + ), + kind: None, + }, + }, + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: FormattedValue { + value: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Name { + id: "user", + ctx: Load, + }, + }, + conversion: None, + format_spec: None, + }, + }, + ], + }, +} diff --git a/nac3parser/src/snapshots/rustpython_parser__fstring__tests__fstring_parse_selfdocumenting_base_more.snap b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__fstring_parse_selfdocumenting_base_more.snap new file mode 100644 index 00000000..6cc933de --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__fstring_parse_selfdocumenting_base_more.snap @@ -0,0 +1,137 @@ +--- +source: parser/src/fstring.rs +expression: parse_ast +--- +Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: JoinedStr { + values: [ + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: Constant { + value: Str( + "mix ", + ), + kind: None, + }, + }, + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: Constant { + value: Str( + "user=", + ), + kind: None, + }, + }, + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: Constant { + value: Str( + "", + ), + kind: None, + }, + }, + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: FormattedValue { + value: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Name { + id: "user", + ctx: Load, + }, + }, + conversion: None, + format_spec: None, + }, + }, + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: Constant { + value: Str( + " with text and ", + ), + kind: None, + }, + }, + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: Constant { + value: Str( + "second=", + ), + kind: None, + }, + }, + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: Constant { + value: Str( + "", + ), + kind: None, + }, + }, + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: FormattedValue { + value: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Name { + id: "second", + ctx: Load, + }, + }, + conversion: None, + format_spec: None, + }, + }, + ], + }, +} diff --git a/nac3parser/src/snapshots/rustpython_parser__fstring__tests__fstring_parse_selfdocumenting_format.snap b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__fstring_parse_selfdocumenting_format.snap new file mode 100644 index 00000000..e0713f5e --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__fstring_parse_selfdocumenting_format.snap @@ -0,0 +1,77 @@ +--- +source: parser/src/fstring.rs +expression: parse_ast +--- +Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: JoinedStr { + values: [ + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: Constant { + value: Str( + "user=", + ), + kind: None, + }, + }, + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: Constant { + value: Str( + "", + ), + kind: None, + }, + }, + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: FormattedValue { + value: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Name { + id: "user", + ctx: Load, + }, + }, + conversion: None, + format_spec: Some( + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: Constant { + value: Str( + ">10", + ), + kind: None, + }, + }, + ), + }, + }, + ], + }, +} diff --git a/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_empty_fstring.snap b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_empty_fstring.snap new file mode 100644 index 00000000..85063867 --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_empty_fstring.snap @@ -0,0 +1,17 @@ +--- +source: parser/src/fstring.rs +expression: "parse_fstring(\"\").unwrap()" +--- +Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: Constant { + value: Str( + "", + ), + kind: None, + }, +} diff --git a/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring.snap b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring.snap new file mode 100644 index 00000000..457853a5 --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring.snap @@ -0,0 +1,72 @@ +--- +source: parser/src/fstring.rs +expression: parse_ast +--- +Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: JoinedStr { + values: [ + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: FormattedValue { + value: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Name { + id: "a", + ctx: Load, + }, + }, + conversion: None, + format_spec: None, + }, + }, + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: FormattedValue { + value: Located { + location: Location { + row: 1, + column: 3, + }, + custom: (), + node: Name { + id: "b", + ctx: Load, + }, + }, + conversion: None, + format_spec: None, + }, + }, + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: Constant { + value: Str( + "{foo}", + ), + kind: None, + }, + }, + ], + }, +} diff --git a/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring_equals.snap b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring_equals.snap new file mode 100644 index 00000000..c1215ed6 --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring_equals.snap @@ -0,0 +1,56 @@ +--- +source: parser/src/fstring.rs +expression: parse_ast + +--- +Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: FormattedValue { + value: Located { + location: Location { + row: 1, + column: 5, + }, + custom: (), + node: Compare { + left: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Constant { + value: Int( + 42, + ), + kind: None, + }, + }, + ops: [ + Eq, + ], + comparators: [ + Located { + location: Location { + row: 1, + column: 8, + }, + custom: (), + node: Constant { + value: Int( + 42, + ), + kind: None, + }, + }, + ], + }, + }, + conversion: None, + format_spec: None, + }, +} diff --git a/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring_nested_spec.snap b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring_nested_spec.snap new file mode 100644 index 00000000..8ecd704b --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring_nested_spec.snap @@ -0,0 +1,49 @@ +--- +source: parser/src/fstring.rs +expression: parse_ast +--- +Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: FormattedValue { + value: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Name { + id: "foo", + ctx: Load, + }, + }, + conversion: None, + format_spec: Some( + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: FormattedValue { + value: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Name { + id: "spec", + ctx: Load, + }, + }, + conversion: None, + format_spec: None, + }, + }, + ), + }, +} diff --git a/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring_not_equals.snap b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring_not_equals.snap new file mode 100644 index 00000000..74bf50b5 --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring_not_equals.snap @@ -0,0 +1,56 @@ +--- +source: parser/src/fstring.rs +expression: parse_ast + +--- +Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: FormattedValue { + value: Located { + location: Location { + row: 1, + column: 4, + }, + custom: (), + node: Compare { + left: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Constant { + value: Int( + 1, + ), + kind: None, + }, + }, + ops: [ + NotEq, + ], + comparators: [ + Located { + location: Location { + row: 1, + column: 7, + }, + custom: (), + node: Constant { + value: Int( + 2, + ), + kind: None, + }, + }, + ], + }, + }, + conversion: None, + format_spec: None, + }, +} diff --git a/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring_not_nested_spec.snap b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring_not_nested_spec.snap new file mode 100644 index 00000000..12147ebb --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring_not_nested_spec.snap @@ -0,0 +1,40 @@ +--- +source: parser/src/fstring.rs +expression: parse_ast +--- +Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: FormattedValue { + value: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Name { + id: "foo", + ctx: Load, + }, + }, + conversion: None, + format_spec: Some( + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: Constant { + value: Str( + "spec", + ), + kind: None, + }, + }, + ), + }, +} diff --git a/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring_selfdoc_prec_space.snap b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring_selfdoc_prec_space.snap new file mode 100644 index 00000000..4b9b4458 --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring_selfdoc_prec_space.snap @@ -0,0 +1,63 @@ +--- +source: parser/src/fstring.rs +expression: parse_ast +--- +Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: JoinedStr { + values: [ + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: Constant { + value: Str( + "x =", + ), + kind: None, + }, + }, + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: Constant { + value: Str( + "", + ), + kind: None, + }, + }, + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: FormattedValue { + value: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Name { + id: "x", + ctx: Load, + }, + }, + conversion: None, + format_spec: None, + }, + }, + ], + }, +} diff --git a/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring_selfdoc_trailing_space.snap b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring_selfdoc_trailing_space.snap new file mode 100644 index 00000000..09c0f32d --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring_selfdoc_trailing_space.snap @@ -0,0 +1,63 @@ +--- +source: parser/src/fstring.rs +expression: parse_ast +--- +Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: JoinedStr { + values: [ + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: Constant { + value: Str( + "x=", + ), + kind: None, + }, + }, + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: Constant { + value: Str( + " ", + ), + kind: None, + }, + }, + Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: FormattedValue { + value: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Name { + id: "x", + ctx: Load, + }, + }, + conversion: None, + format_spec: None, + }, + }, + ], + }, +} diff --git a/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring_yield_expr.snap b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring_yield_expr.snap new file mode 100644 index 00000000..2330059a --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__fstring__tests__parse_fstring_yield_expr.snap @@ -0,0 +1,25 @@ +--- +source: parser/src/fstring.rs +expression: parse_ast +--- +Located { + location: Location { + row: 0, + column: 0, + }, + custom: (), + node: FormattedValue { + value: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Yield { + value: None, + }, + }, + conversion: None, + format_spec: None, + }, +} diff --git a/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_class.snap b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_class.snap new file mode 100644 index 00000000..869669e9 --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_class.snap @@ -0,0 +1,160 @@ +--- +source: parser/src/parser.rs +expression: parse_program(&source).unwrap() +--- +[ + Located { + location: Location { + row: 1, + column: 1, + }, + custom: (), + node: ClassDef { + name: "Foo", + bases: [ + Located { + location: Location { + row: 1, + column: 11, + }, + custom: (), + node: Name { + id: "A", + ctx: Load, + }, + }, + Located { + location: Location { + row: 1, + column: 14, + }, + custom: (), + node: Name { + id: "B", + ctx: Load, + }, + }, + ], + keywords: [], + body: [ + Located { + location: Location { + row: 2, + column: 2, + }, + custom: (), + node: FunctionDef { + name: "__init__", + args: Arguments { + posonlyargs: [], + args: [ + Located { + location: Location { + row: 2, + column: 15, + }, + custom: (), + node: ArgData { + arg: "self", + annotation: None, + type_comment: None, + }, + }, + ], + vararg: None, + kwonlyargs: [], + kw_defaults: [], + kwarg: None, + defaults: [], + }, + body: [ + Located { + location: Location { + row: 3, + column: 3, + }, + custom: (), + node: Pass, + }, + ], + decorator_list: [], + returns: None, + type_comment: None, + }, + }, + Located { + location: Location { + row: 4, + column: 2, + }, + custom: (), + node: FunctionDef { + name: "method_with_default", + args: Arguments { + posonlyargs: [], + args: [ + Located { + location: Location { + row: 4, + column: 26, + }, + custom: (), + node: ArgData { + arg: "self", + annotation: None, + type_comment: None, + }, + }, + Located { + location: Location { + row: 4, + column: 32, + }, + custom: (), + node: ArgData { + arg: "arg", + annotation: None, + type_comment: None, + }, + }, + ], + vararg: None, + kwonlyargs: [], + kw_defaults: [], + kwarg: None, + defaults: [ + Located { + location: Location { + row: 4, + column: 37, + }, + custom: (), + node: Constant { + value: Str( + "default", + ), + kind: None, + }, + }, + ], + }, + body: [ + Located { + location: Location { + row: 5, + column: 3, + }, + custom: (), + node: Pass, + }, + ], + decorator_list: [], + returns: None, + type_comment: None, + }, + }, + ], + decorator_list: [], + }, + }, +] diff --git a/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_dict_comprehension.snap b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_dict_comprehension.snap new file mode 100644 index 00000000..65369dc3 --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_dict_comprehension.snap @@ -0,0 +1,63 @@ +--- +source: parser/src/parser.rs +expression: parse_ast +--- +Located { + location: Location { + row: 1, + column: 1, + }, + custom: (), + node: DictComp { + key: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Name { + id: "x1", + ctx: Load, + }, + }, + value: Located { + location: Location { + row: 1, + column: 6, + }, + custom: (), + node: Name { + id: "x2", + ctx: Load, + }, + }, + generators: [ + Comprehension { + target: Located { + location: Location { + row: 1, + column: 13, + }, + custom: (), + node: Name { + id: "y", + ctx: Load, + }, + }, + iter: Located { + location: Location { + row: 1, + column: 18, + }, + custom: (), + node: Name { + id: "z", + ctx: Load, + }, + }, + ifs: [], + is_async: false, + }, + ], + }, +} diff --git a/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_double_list_comprehension.snap b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_double_list_comprehension.snap new file mode 100644 index 00000000..4095f648 --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_double_list_comprehension.snap @@ -0,0 +1,179 @@ +--- +source: parser/src/parser.rs +expression: parse_ast + +--- +Located { + location: Location { + row: 1, + column: 1, + }, + custom: (), + node: ListComp { + elt: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Name { + id: "x", + ctx: Load, + }, + }, + generators: [ + Comprehension { + target: Located { + location: Location { + row: 1, + column: 8, + }, + custom: (), + node: Tuple { + elts: [ + Located { + location: Location { + row: 1, + column: 8, + }, + custom: (), + node: Name { + id: "y", + ctx: Load, + }, + }, + Located { + location: Location { + row: 1, + column: 11, + }, + custom: (), + node: Name { + id: "y2", + ctx: Load, + }, + }, + ], + ctx: Load, + }, + }, + iter: Located { + location: Location { + row: 1, + column: 17, + }, + custom: (), + node: Name { + id: "z", + ctx: Load, + }, + }, + ifs: [], + is_async: false, + }, + Comprehension { + target: Located { + location: Location { + row: 1, + column: 23, + }, + custom: (), + node: Name { + id: "a", + ctx: Load, + }, + }, + iter: Located { + location: Location { + row: 1, + column: 28, + }, + custom: (), + node: Name { + id: "b", + ctx: Load, + }, + }, + ifs: [ + Located { + location: Location { + row: 1, + column: 35, + }, + custom: (), + node: Compare { + left: Located { + location: Location { + row: 1, + column: 33, + }, + custom: (), + node: Name { + id: "a", + ctx: Load, + }, + }, + ops: [ + Lt, + ], + comparators: [ + Located { + location: Location { + row: 1, + column: 37, + }, + custom: (), + node: Constant { + value: Int( + 5, + ), + kind: None, + }, + }, + ], + }, + }, + Located { + location: Location { + row: 1, + column: 44, + }, + custom: (), + node: Compare { + left: Located { + location: Location { + row: 1, + column: 42, + }, + custom: (), + node: Name { + id: "a", + ctx: Load, + }, + }, + ops: [ + Gt, + ], + comparators: [ + Located { + location: Location { + row: 1, + column: 46, + }, + custom: (), + node: Constant { + value: Int( + 10, + ), + kind: None, + }, + }, + ], + }, + }, + ], + is_async: false, + }, + ], + }, +} diff --git a/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_empty.snap b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_empty.snap new file mode 100644 index 00000000..64d89c15 --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_empty.snap @@ -0,0 +1,5 @@ +--- +source: parser/src/parser.rs +expression: parse_ast +--- +[] diff --git a/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_if_elif_else.snap b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_if_elif_else.snap new file mode 100644 index 00000000..4b34c0e4 --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_if_elif_else.snap @@ -0,0 +1,125 @@ +--- +source: parser/src/parser.rs +expression: parse_ast + +--- +[ + Located { + location: Location { + row: 1, + column: 1, + }, + custom: (), + node: If { + test: Located { + location: Location { + row: 1, + column: 4, + }, + custom: (), + node: Constant { + value: Int( + 1, + ), + kind: None, + }, + }, + body: [ + Located { + location: Location { + row: 1, + column: 7, + }, + custom: (), + node: Expr { + value: Located { + location: Location { + row: 1, + column: 7, + }, + custom: (), + node: Constant { + value: Int( + 10, + ), + kind: None, + }, + }, + }, + }, + ], + orelse: [ + Located { + location: Location { + row: 2, + column: 1, + }, + custom: (), + node: If { + test: Located { + location: Location { + row: 2, + column: 6, + }, + custom: (), + node: Constant { + value: Int( + 2, + ), + kind: None, + }, + }, + body: [ + Located { + location: Location { + row: 2, + column: 9, + }, + custom: (), + node: Expr { + value: Located { + location: Location { + row: 2, + column: 9, + }, + custom: (), + node: Constant { + value: Int( + 20, + ), + kind: None, + }, + }, + }, + }, + ], + orelse: [ + Located { + location: Location { + row: 3, + column: 7, + }, + custom: (), + node: Expr { + value: Located { + location: Location { + row: 3, + column: 7, + }, + custom: (), + node: Constant { + value: Int( + 30, + ), + kind: None, + }, + }, + }, + }, + ], + }, + }, + ], + }, + }, +] diff --git a/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_kwargs.snap b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_kwargs.snap new file mode 100644 index 00000000..d31a3921 --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_kwargs.snap @@ -0,0 +1,78 @@ +--- +source: parser/src/parser.rs +expression: parse_ast + +--- +[ + Located { + location: Location { + row: 1, + column: 1, + }, + custom: (), + node: Expr { + value: Located { + location: Location { + row: 1, + column: 8, + }, + custom: (), + node: Call { + func: Located { + location: Location { + row: 1, + column: 1, + }, + custom: (), + node: Name { + id: "my_func", + ctx: Load, + }, + }, + args: [ + Located { + location: Location { + row: 1, + column: 10, + }, + custom: (), + node: Constant { + value: Str( + "positional", + ), + kind: None, + }, + }, + ], + keywords: [ + Located { + location: Location { + row: 1, + column: 23, + }, + custom: (), + node: KeywordData { + arg: Some( + "keyword", + ), + value: Located { + location: Location { + row: 1, + column: 31, + }, + custom: (), + node: Constant { + value: Int( + 2, + ), + kind: None, + }, + }, + }, + }, + ], + }, + }, + }, + }, +] diff --git a/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_lambda.snap b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_lambda.snap new file mode 100644 index 00000000..2f70292d --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_lambda.snap @@ -0,0 +1,90 @@ +--- +source: parser/src/parser.rs +expression: parse_ast +--- +[ + Located { + location: Location { + row: 1, + column: 1, + }, + custom: (), + node: Expr { + value: Located { + location: Location { + row: 1, + column: 1, + }, + custom: (), + node: Lambda { + args: Arguments { + posonlyargs: [], + args: [ + Located { + location: Location { + row: 1, + column: 8, + }, + custom: (), + node: ArgData { + arg: "x", + annotation: None, + type_comment: None, + }, + }, + Located { + location: Location { + row: 1, + column: 11, + }, + custom: (), + node: ArgData { + arg: "y", + annotation: None, + type_comment: None, + }, + }, + ], + vararg: None, + kwonlyargs: [], + kw_defaults: [], + kwarg: None, + defaults: [], + }, + body: Located { + location: Location { + row: 1, + column: 16, + }, + custom: (), + node: BinOp { + left: Located { + location: Location { + row: 1, + column: 14, + }, + custom: (), + node: Name { + id: "x", + ctx: Load, + }, + }, + op: Mult, + right: Located { + location: Location { + row: 1, + column: 18, + }, + custom: (), + node: Name { + id: "y", + ctx: Load, + }, + }, + }, + }, + }, + }, + }, + }, +] diff --git a/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_list_comprehension.snap b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_list_comprehension.snap new file mode 100644 index 00000000..d67e166d --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_list_comprehension.snap @@ -0,0 +1,52 @@ +--- +source: parser/src/parser.rs +expression: parse_ast +--- +Located { + location: Location { + row: 1, + column: 1, + }, + custom: (), + node: ListComp { + elt: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Name { + id: "x", + ctx: Load, + }, + }, + generators: [ + Comprehension { + target: Located { + location: Location { + row: 1, + column: 8, + }, + custom: (), + node: Name { + id: "y", + ctx: Load, + }, + }, + iter: Located { + location: Location { + row: 1, + column: 13, + }, + custom: (), + node: Name { + id: "z", + ctx: Load, + }, + }, + ifs: [], + is_async: false, + }, + ], + }, +} diff --git a/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_print_2.snap b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_print_2.snap new file mode 100644 index 00000000..6fe4cad3 --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_print_2.snap @@ -0,0 +1,65 @@ +--- +source: parser/src/parser.rs +expression: parse_ast + +--- +[ + Located { + location: Location { + row: 1, + column: 1, + }, + custom: (), + node: Expr { + value: Located { + location: Location { + row: 1, + column: 6, + }, + custom: (), + node: Call { + func: Located { + location: Location { + row: 1, + column: 1, + }, + custom: (), + node: Name { + id: "print", + ctx: Load, + }, + }, + args: [ + Located { + location: Location { + row: 1, + column: 8, + }, + custom: (), + node: Constant { + value: Str( + "Hello world", + ), + kind: None, + }, + }, + Located { + location: Location { + row: 1, + column: 22, + }, + custom: (), + node: Constant { + value: Int( + 2, + ), + kind: None, + }, + }, + ], + keywords: [], + }, + }, + }, + }, +] diff --git a/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_print_hello.snap b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_print_hello.snap new file mode 100644 index 00000000..b4e78d8d --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_print_hello.snap @@ -0,0 +1,51 @@ +--- +source: parser/src/parser.rs +expression: parse_ast +--- +[ + Located { + location: Location { + row: 1, + column: 1, + }, + custom: (), + node: Expr { + value: Located { + location: Location { + row: 1, + column: 6, + }, + custom: (), + node: Call { + func: Located { + location: Location { + row: 1, + column: 1, + }, + custom: (), + node: Name { + id: "print", + ctx: Load, + }, + }, + args: [ + Located { + location: Location { + row: 1, + column: 8, + }, + custom: (), + node: Constant { + value: Str( + "Hello world", + ), + kind: None, + }, + }, + ], + keywords: [], + }, + }, + }, + }, +] diff --git a/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_tuples.snap b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_tuples.snap new file mode 100644 index 00000000..26243f56 --- /dev/null +++ b/nac3parser/src/snapshots/rustpython_parser__parser__tests__parse_tuples.snap @@ -0,0 +1,91 @@ +--- +source: parser/src/parser.rs +expression: parse_program(&source).unwrap() + +--- +[ + Located { + location: Location { + row: 1, + column: 1, + }, + custom: (), + node: Assign { + targets: [ + Located { + location: Location { + row: 1, + column: 1, + }, + custom: (), + node: Tuple { + elts: [ + Located { + location: Location { + row: 1, + column: 1, + }, + custom: (), + node: Name { + id: "a", + ctx: Load, + }, + }, + Located { + location: Location { + row: 1, + column: 4, + }, + custom: (), + node: Name { + id: "b", + ctx: Load, + }, + }, + ], + ctx: Load, + }, + }, + ], + value: Located { + location: Location { + row: 1, + column: 8, + }, + custom: (), + node: Tuple { + elts: [ + Located { + location: Location { + row: 1, + column: 8, + }, + custom: (), + node: Constant { + value: Int( + 4, + ), + kind: None, + }, + }, + Located { + location: Location { + row: 1, + column: 11, + }, + custom: (), + node: Constant { + value: Int( + 5, + ), + kind: None, + }, + }, + ], + ctx: Load, + }, + }, + type_comment: None, + }, + }, +] diff --git a/nac3parser/src/token.rs b/nac3parser/src/token.rs new file mode 100644 index 00000000..1cedc66a --- /dev/null +++ b/nac3parser/src/token.rs @@ -0,0 +1,227 @@ +//! Different token definitions. +//! Loosely based on token.h from CPython source: +use num_bigint::BigInt; +use std::fmt::{self, Write}; + +/// Python source code can be tokenized in a sequence of these tokens. +#[derive(Clone, Debug, PartialEq)] +pub enum Tok { + Name { name: String }, + Int { value: BigInt }, + Float { value: f64 }, + Complex { real: f64, imag: f64 }, + String { value: String, is_fstring: bool }, + Bytes { value: Vec }, + Newline, + Indent, + Dedent, + StartModule, + StartInteractive, + StartExpression, + EndOfFile, + Lpar, + Rpar, + Lsqb, + Rsqb, + Colon, + Comma, + Semi, + Plus, + Minus, + Star, + Slash, + Vbar, // '|' + Amper, // '&' + Less, + Greater, + Equal, + Dot, + Percent, + Lbrace, + Rbrace, + EqEqual, + NotEqual, + LessEqual, + GreaterEqual, + Tilde, + CircumFlex, + LeftShift, + RightShift, + DoubleStar, + DoubleStarEqual, // '**=' + PlusEqual, + MinusEqual, + StarEqual, + SlashEqual, + PercentEqual, + AmperEqual, // '&=' + VbarEqual, + CircumflexEqual, // '^=' + LeftShiftEqual, + RightShiftEqual, + DoubleSlash, // '//' + DoubleSlashEqual, + ColonEqual, + At, + AtEqual, + Rarrow, + Ellipsis, + + // Keywords (alphabetically): + False, + None, + True, + + And, + As, + Assert, + Async, + Await, + Break, + Class, + Continue, + Def, + Del, + Elif, + Else, + Except, + Finally, + For, + From, + Global, + If, + Import, + In, + Is, + Lambda, + Nonlocal, + Not, + Or, + Pass, + Raise, + Return, + Try, + While, + With, + Yield, +} + +impl fmt::Display for Tok { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use Tok::*; + match self { + Name { name } => write!(f, "'{}'", name), + Int { value } => write!(f, "'{}'", value), + Float { value } => write!(f, "'{}'", value), + Complex { real, imag } => write!(f, "{}j{}", real, imag), + String { value, is_fstring } => { + if *is_fstring { + write!(f, "f")? + } + write!(f, "{:?}", value) + } + Bytes { value } => { + write!(f, "b\"")?; + for i in value { + match i { + 9 => f.write_str("\\t")?, + 10 => f.write_str("\\n")?, + 13 => f.write_str("\\r")?, + 32..=126 => f.write_char(*i as char)?, + _ => write!(f, "\\x{:02x}", i)?, + } + } + f.write_str("\"") + } + Newline => f.write_str("Newline"), + Indent => f.write_str("Indent"), + Dedent => f.write_str("Dedent"), + StartModule => f.write_str("StartProgram"), + StartInteractive => f.write_str("StartInteractive"), + StartExpression => f.write_str("StartExpression"), + EndOfFile => f.write_str("EOF"), + Lpar => f.write_str("'('"), + Rpar => f.write_str("')'"), + Lsqb => f.write_str("'['"), + Rsqb => f.write_str("']'"), + Colon => f.write_str("':'"), + Comma => f.write_str("','"), + Semi => f.write_str("';'"), + Plus => f.write_str("'+'"), + Minus => f.write_str("'-'"), + Star => f.write_str("'*'"), + Slash => f.write_str("'/'"), + Vbar => f.write_str("'|'"), + Amper => f.write_str("'&'"), + Less => f.write_str("'<'"), + Greater => f.write_str("'>'"), + Equal => f.write_str("'='"), + Dot => f.write_str("'.'"), + Percent => f.write_str("'%'"), + Lbrace => f.write_str("'{'"), + Rbrace => f.write_str("'}'"), + EqEqual => f.write_str("'=='"), + NotEqual => f.write_str("'!='"), + LessEqual => f.write_str("'<='"), + GreaterEqual => f.write_str("'>='"), + Tilde => f.write_str("'~'"), + CircumFlex => f.write_str("'^'"), + LeftShift => f.write_str("'<<'"), + RightShift => f.write_str("'>>'"), + DoubleStar => f.write_str("'**'"), + DoubleStarEqual => f.write_str("'**='"), + PlusEqual => f.write_str("'+='"), + MinusEqual => f.write_str("'-='"), + StarEqual => f.write_str("'*='"), + SlashEqual => f.write_str("'/='"), + PercentEqual => f.write_str("'%='"), + AmperEqual => f.write_str("'&='"), + VbarEqual => f.write_str("'|='"), + CircumflexEqual => f.write_str("'^='"), + LeftShiftEqual => f.write_str("'<<='"), + RightShiftEqual => f.write_str("'>>='"), + DoubleSlash => f.write_str("'//'"), + DoubleSlashEqual => f.write_str("'//='"), + At => f.write_str("'@'"), + AtEqual => f.write_str("'@='"), + Rarrow => f.write_str("'->'"), + Ellipsis => f.write_str("'...'"), + False => f.write_str("'False'"), + None => f.write_str("'None'"), + True => f.write_str("'True'"), + And => f.write_str("'and'"), + As => f.write_str("'as'"), + Assert => f.write_str("'assert'"), + Async => f.write_str("'async'"), + Await => f.write_str("'await'"), + Break => f.write_str("'break'"), + Class => f.write_str("'class'"), + Continue => f.write_str("'continue'"), + Def => f.write_str("'def'"), + Del => f.write_str("'del'"), + Elif => f.write_str("'elif'"), + Else => f.write_str("'else'"), + Except => f.write_str("'except'"), + Finally => f.write_str("'finally'"), + For => f.write_str("'for'"), + From => f.write_str("'from'"), + Global => f.write_str("'global'"), + If => f.write_str("'if'"), + Import => f.write_str("'import'"), + In => f.write_str("'in'"), + Is => f.write_str("'is'"), + Lambda => f.write_str("'lambda'"), + Nonlocal => f.write_str("'nonlocal'"), + Not => f.write_str("'not'"), + Or => f.write_str("'or'"), + Pass => f.write_str("'pass'"), + Raise => f.write_str("'raise'"), + Return => f.write_str("'return'"), + Try => f.write_str("'try'"), + While => f.write_str("'while'"), + With => f.write_str("'with'"), + Yield => f.write_str("'yield'"), + ColonEqual => f.write_str("':='"), + } + } +}