rustpython-parser: string interner, optimizations, thread local cache
corresponds to M-Labs RustPython fork at efdf7829ba1a5f87d30df8eaff12a330544f3cbd branch parser-mod
This commit is contained in:
parent
80c7bc1cbd
commit
48ce6bb6c5
|
@ -11,3 +11,7 @@ fold = []
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
num-bigint = "0.4.0"
|
num-bigint = "0.4.0"
|
||||||
|
lazy_static = "1.4.0"
|
||||||
|
parking_lot = "0.11.1"
|
||||||
|
string-interner = "0.13.0"
|
||||||
|
fxhash = "0.2.1"
|
||||||
|
|
|
@ -185,7 +185,7 @@ class StructVisitor(TypeInfoEmitVisitor):
|
||||||
self.sum_with_constructors(sum, name, depth)
|
self.sum_with_constructors(sum, name, depth)
|
||||||
|
|
||||||
def emit_attrs(self, depth):
|
def emit_attrs(self, depth):
|
||||||
self.emit("#[derive(Debug, PartialEq)]", depth)
|
self.emit("#[derive(Clone, Debug, PartialEq)]", depth)
|
||||||
|
|
||||||
def simple_sum(self, sum, name, depth):
|
def simple_sum(self, sum, name, depth):
|
||||||
rustname = get_rust_type(name)
|
rustname = get_rust_type(name)
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,6 +1,6 @@
|
||||||
use num_bigint::BigInt;
|
use num_bigint::BigInt;
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
pub enum Constant {
|
pub enum Constant {
|
||||||
None,
|
None,
|
||||||
Bool(bool),
|
Bool(bool),
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
use crate::constant;
|
use crate::constant;
|
||||||
use crate::fold::Fold;
|
use crate::fold::Fold;
|
||||||
|
use crate::StrRef;
|
||||||
|
|
||||||
pub(crate) trait Foldable<T, U> {
|
pub(crate) trait Foldable<T, U> {
|
||||||
type Mapped;
|
type Mapped;
|
||||||
|
@ -67,6 +68,7 @@ simple_fold!(
|
||||||
usize,
|
usize,
|
||||||
String,
|
String,
|
||||||
bool,
|
bool,
|
||||||
|
StrRef,
|
||||||
constant::Constant,
|
constant::Constant,
|
||||||
constant::ConversionFlag
|
constant::ConversionFlag
|
||||||
);
|
);
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
#[macro_use]
|
||||||
|
extern crate lazy_static;
|
||||||
|
|
||||||
mod ast_gen;
|
mod ast_gen;
|
||||||
mod constant;
|
mod constant;
|
||||||
#[cfg(feature = "fold")]
|
#[cfg(feature = "fold")]
|
||||||
|
|
|
@ -70,7 +70,7 @@ pub fn parse_args(func_args: Vec<FunctionArgument>) -> Result<ArgumentList, Lexi
|
||||||
keywords.push(ast::Keyword::new(
|
keywords.push(ast::Keyword::new(
|
||||||
location,
|
location,
|
||||||
ast::KeywordData {
|
ast::KeywordData {
|
||||||
arg: name,
|
arg: name.map(|name| name.into()),
|
||||||
value: Box::new(value),
|
value: Box::new(value),
|
||||||
},
|
},
|
||||||
));
|
));
|
||||||
|
|
|
@ -250,7 +250,7 @@ where
|
||||||
if let Some(tok) = KEYWORDS.get(name.as_str()) {
|
if let Some(tok) = KEYWORDS.get(name.as_str()) {
|
||||||
Ok((start_pos, tok.clone(), end_pos))
|
Ok((start_pos, tok.clone(), end_pos))
|
||||||
} else {
|
} else {
|
||||||
Ok((start_pos, Tok::Name { name }, end_pos))
|
Ok((start_pos, Tok::Name { name: name.into() }, end_pos))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -622,13 +622,18 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_identifier_start(&self, c: char) -> bool {
|
fn is_identifier_start(&self, c: char) -> bool {
|
||||||
c == '_' || is_xid_start(c)
|
match c {
|
||||||
|
'_' | 'a'..='z' | 'A'..='Z' => true,
|
||||||
|
'+' | '-' | '*' | '/' | '=' | ' ' | '<' | '>' => false,
|
||||||
|
c => is_xid_start(c),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_identifier_continuation(&self) -> bool {
|
fn is_identifier_continuation(&self) -> bool {
|
||||||
if let Some(c) = self.chr0 {
|
if let Some(c) = self.chr0 {
|
||||||
match c {
|
match c {
|
||||||
'_' | '0'..='9' => true,
|
'_' | '0'..='9' | 'a'..='z' | 'A'..='Z' => true,
|
||||||
|
'+' | '-' | '*' | '/' | '=' | ' ' | '<' | '>' => false,
|
||||||
c => is_xid_continue(c),
|
c => is_xid_continue(c),
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -784,9 +789,7 @@ where
|
||||||
let tok_end = self.get_pos();
|
let tok_end = self.get_pos();
|
||||||
self.emit((
|
self.emit((
|
||||||
tok_start,
|
tok_start,
|
||||||
Tok::Name {
|
Tok::Name { name: c.to_string().into() },
|
||||||
name: c.to_string(),
|
|
||||||
},
|
|
||||||
tok_end,
|
tok_end,
|
||||||
));
|
));
|
||||||
} else {
|
} else {
|
||||||
|
@ -1402,7 +1405,7 @@ mod tests {
|
||||||
tokens,
|
tokens,
|
||||||
vec![
|
vec![
|
||||||
Tok::Name {
|
Tok::Name {
|
||||||
name: String::from("avariable"),
|
name: String::from("avariable").into(),
|
||||||
},
|
},
|
||||||
Tok::Equal,
|
Tok::Equal,
|
||||||
Tok::Int {
|
Tok::Int {
|
||||||
|
@ -1433,7 +1436,7 @@ mod tests {
|
||||||
vec![
|
vec![
|
||||||
Tok::Def,
|
Tok::Def,
|
||||||
Tok::Name {
|
Tok::Name {
|
||||||
name: String::from("foo"),
|
name: String::from("foo").into(),
|
||||||
},
|
},
|
||||||
Tok::Lpar,
|
Tok::Lpar,
|
||||||
Tok::Rpar,
|
Tok::Rpar,
|
||||||
|
@ -1469,7 +1472,7 @@ mod tests {
|
||||||
vec![
|
vec![
|
||||||
Tok::Def,
|
Tok::Def,
|
||||||
Tok::Name {
|
Tok::Name {
|
||||||
name: String::from("foo"),
|
name: String::from("foo").into(),
|
||||||
},
|
},
|
||||||
Tok::Lpar,
|
Tok::Lpar,
|
||||||
Tok::Rpar,
|
Tok::Rpar,
|
||||||
|
@ -1478,7 +1481,7 @@ mod tests {
|
||||||
Tok::Indent,
|
Tok::Indent,
|
||||||
Tok::If,
|
Tok::If,
|
||||||
Tok::Name {
|
Tok::Name {
|
||||||
name: String::from("x"),
|
name: String::from("x").into(),
|
||||||
},
|
},
|
||||||
Tok::Colon,
|
Tok::Colon,
|
||||||
Tok::Newline,
|
Tok::Newline,
|
||||||
|
@ -1507,7 +1510,7 @@ mod tests {
|
||||||
vec![
|
vec![
|
||||||
Tok::Def,
|
Tok::Def,
|
||||||
Tok::Name {
|
Tok::Name {
|
||||||
name: String::from("foo"),
|
name: String::from("foo").into(),
|
||||||
},
|
},
|
||||||
Tok::Lpar,
|
Tok::Lpar,
|
||||||
Tok::Rpar,
|
Tok::Rpar,
|
||||||
|
@ -1516,7 +1519,7 @@ mod tests {
|
||||||
Tok::Indent,
|
Tok::Indent,
|
||||||
Tok::If,
|
Tok::If,
|
||||||
Tok::Name {
|
Tok::Name {
|
||||||
name: String::from("x"),
|
name: String::from("x").into(),
|
||||||
},
|
},
|
||||||
Tok::Colon,
|
Tok::Colon,
|
||||||
Tok::Newline,
|
Tok::Newline,
|
||||||
|
@ -1556,7 +1559,7 @@ mod tests {
|
||||||
tokens,
|
tokens,
|
||||||
vec![
|
vec![
|
||||||
Tok::Name {
|
Tok::Name {
|
||||||
name: String::from("x"),
|
name: String::from("x").into(),
|
||||||
},
|
},
|
||||||
Tok::Equal,
|
Tok::Equal,
|
||||||
Tok::Lsqb,
|
Tok::Lsqb,
|
||||||
|
|
|
@ -246,7 +246,7 @@ ImportStatement: ast::Stmt = {
|
||||||
location,
|
location,
|
||||||
node: ast::StmtKind::ImportFrom {
|
node: ast::StmtKind::ImportFrom {
|
||||||
level,
|
level,
|
||||||
module,
|
module: module.map(|s| s.into()),
|
||||||
names
|
names
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -272,24 +272,25 @@ ImportAsNames: Vec<ast::Alias> = {
|
||||||
"(" <i:OneOrMore<ImportAsAlias<Identifier>>> ","? ")" => i,
|
"(" <i:OneOrMore<ImportAsAlias<Identifier>>> ","? ")" => i,
|
||||||
"*" => {
|
"*" => {
|
||||||
// Star import all
|
// Star import all
|
||||||
vec![ast::Alias { name: "*".to_string(), asname: None }]
|
vec![ast::Alias { name: "*".into(), asname: None }]
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
ImportAsAlias<I>: ast::Alias = {
|
ImportAsAlias<I>: ast::Alias = {
|
||||||
<name:I> <a: ("as" Identifier)?> => ast::Alias { name, asname: a.map(|a| a.1) },
|
<name:I> <a: ("as" Identifier)?> => ast::Alias { name: name.into(), asname: a.map(|a| a.1) },
|
||||||
};
|
};
|
||||||
|
|
||||||
// A name like abc or abc.def.ghi
|
// A name like abc or abc.def.ghi
|
||||||
DottedName: String = {
|
DottedName: String = {
|
||||||
<n:name> => n,
|
<n:name> => n.into(),
|
||||||
<n:name> <n2: ("." Identifier)+> => {
|
<n:name> <n2: ("." Identifier)+> => {
|
||||||
let mut r = n.to_string();
|
let lock = ast::get_str_ref_lock();
|
||||||
|
let mut r = ast::get_str_from_ref(&lock, n).to_string();
|
||||||
for x in n2 {
|
for x in n2 {
|
||||||
r.push_str(".");
|
r.push_str(".");
|
||||||
r.push_str(&x.1);
|
r.push_str(&ast::get_str_from_ref(&lock, x.1));
|
||||||
}
|
}
|
||||||
r
|
r
|
||||||
},
|
},
|
||||||
|
@ -1157,7 +1158,7 @@ FunctionArgument: (Option<(ast::Location, Option<String>)>, ast::Expr) = {
|
||||||
};
|
};
|
||||||
(None, expr)
|
(None, expr)
|
||||||
},
|
},
|
||||||
<location:@L> <i:Identifier> "=" <e:Test> => (Some((location, Some(i))), e),
|
<location:@L> <i:Identifier> "=" <e:Test> => (Some((location, Some(i.into()))), e),
|
||||||
<location:@L> "*" <e:Test> => {
|
<location:@L> "*" <e:Test> => {
|
||||||
let expr = ast::Expr::new(
|
let expr = ast::Expr::new(
|
||||||
location,
|
location,
|
||||||
|
@ -1199,7 +1200,7 @@ Bytes: Vec<u8> = {
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
Identifier: String = <s:name> => s;
|
Identifier: ast::StrRef = <s:name> => s;
|
||||||
|
|
||||||
// Hook external lexer:
|
// Hook external lexer:
|
||||||
extern {
|
extern {
|
||||||
|
@ -1299,7 +1300,7 @@ extern {
|
||||||
complex => lexer::Tok::Complex { real: <f64>, imag: <f64> },
|
complex => lexer::Tok::Complex { real: <f64>, imag: <f64> },
|
||||||
string => lexer::Tok::String { value: <String>, is_fstring: <bool> },
|
string => lexer::Tok::String { value: <String>, is_fstring: <bool> },
|
||||||
bytes => lexer::Tok::Bytes { value: <Vec<u8>> },
|
bytes => lexer::Tok::Bytes { value: <Vec<u8>> },
|
||||||
name => lexer::Tok::Name { name: <String> },
|
name => lexer::Tok::Name { name: <ast::StrRef> },
|
||||||
"\n" => lexer::Tok::Newline,
|
"\n" => lexer::Tok::Newline,
|
||||||
";" => lexer::Tok::Semi,
|
";" => lexer::Tok::Semi,
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,11 +2,12 @@
|
||||||
//! Loosely based on token.h from CPython source:
|
//! Loosely based on token.h from CPython source:
|
||||||
use num_bigint::BigInt;
|
use num_bigint::BigInt;
|
||||||
use std::fmt::{self, Write};
|
use std::fmt::{self, Write};
|
||||||
|
use crate::ast;
|
||||||
|
|
||||||
/// Python source code can be tokenized in a sequence of these tokens.
|
/// Python source code can be tokenized in a sequence of these tokens.
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
pub enum Tok {
|
pub enum Tok {
|
||||||
Name { name: String },
|
Name { name: ast::StrRef },
|
||||||
Int { value: BigInt },
|
Int { value: BigInt },
|
||||||
Float { value: f64 },
|
Float { value: f64 },
|
||||||
Complex { real: f64, imag: f64 },
|
Complex { real: f64, imag: f64 },
|
||||||
|
@ -110,7 +111,7 @@ impl fmt::Display for Tok {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
use Tok::*;
|
use Tok::*;
|
||||||
match self {
|
match self {
|
||||||
Name { name } => write!(f, "'{}'", name),
|
Name { name } => write!(f, "'{}'", ast::get_str_from_ref(&ast::get_str_ref_lock(), *name)),
|
||||||
Int { value } => write!(f, "'{}'", value),
|
Int { value } => write!(f, "'{}'", value),
|
||||||
Float { value } => write!(f, "'{}'", value),
|
Float { value } => write!(f, "'{}'", value),
|
||||||
Complex { real, imag } => write!(f, "{}j{}", real, imag),
|
Complex { real, imag } => write!(f, "{}j{}", real, imag),
|
||||||
|
|
Loading…
Reference in New Issue