rustpython-parser: string interner, optimizations, thread local cache

corresponds to M-Labs RustPython fork at efdf7829ba1a5f87d30df8eaff12a330544f3cbd
branch parser-mod
This commit is contained in:
pca006132 2021-11-03 16:35:16 +08:00 committed by Sebastien Bourdeauducq
parent 80c7bc1cbd
commit 48ce6bb6c5
10 changed files with 737 additions and 647 deletions

View File

@ -11,3 +11,7 @@ fold = []
[dependencies]
num-bigint = "0.4.0"
lazy_static = "1.4.0"
parking_lot = "0.11.1"
string-interner = "0.13.0"
fxhash = "0.2.1"

View File

@ -185,7 +185,7 @@ class StructVisitor(TypeInfoEmitVisitor):
self.sum_with_constructors(sum, name, depth)
def emit_attrs(self, depth):
self.emit("#[derive(Debug, PartialEq)]", depth)
self.emit("#[derive(Clone, Debug, PartialEq)]", depth)
def simple_sum(self, sum, name, depth):
rustname = get_rust_type(name)

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
use num_bigint::BigInt;
#[derive(Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq)]
pub enum Constant {
None,
Bool(bool),

View File

@ -1,5 +1,6 @@
use crate::constant;
use crate::fold::Fold;
use crate::StrRef;
pub(crate) trait Foldable<T, U> {
type Mapped;
@ -67,6 +68,7 @@ simple_fold!(
usize,
String,
bool,
StrRef,
constant::Constant,
constant::ConversionFlag
);

View File

@ -1,3 +1,6 @@
#[macro_use]
extern crate lazy_static;
mod ast_gen;
mod constant;
#[cfg(feature = "fold")]

View File

@ -70,7 +70,7 @@ pub fn parse_args(func_args: Vec<FunctionArgument>) -> Result<ArgumentList, Lexi
keywords.push(ast::Keyword::new(
location,
ast::KeywordData {
arg: name,
arg: name.map(|name| name.into()),
value: Box::new(value),
},
));

View File

@ -250,7 +250,7 @@ where
if let Some(tok) = KEYWORDS.get(name.as_str()) {
Ok((start_pos, tok.clone(), end_pos))
} else {
Ok((start_pos, Tok::Name { name }, end_pos))
Ok((start_pos, Tok::Name { name: name.into() }, end_pos))
}
}
@ -622,13 +622,18 @@ where
}
fn is_identifier_start(&self, c: char) -> bool {
c == '_' || is_xid_start(c)
match c {
'_' | 'a'..='z' | 'A'..='Z' => true,
'+' | '-' | '*' | '/' | '=' | ' ' | '<' | '>' => false,
c => is_xid_start(c),
}
}
fn is_identifier_continuation(&self) -> bool {
if let Some(c) = self.chr0 {
match c {
'_' | '0'..='9' => true,
'_' | '0'..='9' | 'a'..='z' | 'A'..='Z' => true,
'+' | '-' | '*' | '/' | '=' | ' ' | '<' | '>' => false,
c => is_xid_continue(c),
}
} else {
@ -784,9 +789,7 @@ where
let tok_end = self.get_pos();
self.emit((
tok_start,
Tok::Name {
name: c.to_string(),
},
Tok::Name { name: c.to_string().into() },
tok_end,
));
} else {
@ -1402,7 +1405,7 @@ mod tests {
tokens,
vec![
Tok::Name {
name: String::from("avariable"),
name: String::from("avariable").into(),
},
Tok::Equal,
Tok::Int {
@ -1433,7 +1436,7 @@ mod tests {
vec![
Tok::Def,
Tok::Name {
name: String::from("foo"),
name: String::from("foo").into(),
},
Tok::Lpar,
Tok::Rpar,
@ -1469,7 +1472,7 @@ mod tests {
vec![
Tok::Def,
Tok::Name {
name: String::from("foo"),
name: String::from("foo").into(),
},
Tok::Lpar,
Tok::Rpar,
@ -1478,7 +1481,7 @@ mod tests {
Tok::Indent,
Tok::If,
Tok::Name {
name: String::from("x"),
name: String::from("x").into(),
},
Tok::Colon,
Tok::Newline,
@ -1507,7 +1510,7 @@ mod tests {
vec![
Tok::Def,
Tok::Name {
name: String::from("foo"),
name: String::from("foo").into(),
},
Tok::Lpar,
Tok::Rpar,
@ -1516,7 +1519,7 @@ mod tests {
Tok::Indent,
Tok::If,
Tok::Name {
name: String::from("x"),
name: String::from("x").into(),
},
Tok::Colon,
Tok::Newline,
@ -1556,7 +1559,7 @@ mod tests {
tokens,
vec![
Tok::Name {
name: String::from("x"),
name: String::from("x").into(),
},
Tok::Equal,
Tok::Lsqb,

View File

@ -246,7 +246,7 @@ ImportStatement: ast::Stmt = {
location,
node: ast::StmtKind::ImportFrom {
level,
module,
module: module.map(|s| s.into()),
names
},
}
@ -272,24 +272,25 @@ ImportAsNames: Vec<ast::Alias> = {
"(" <i:OneOrMore<ImportAsAlias<Identifier>>> ","? ")" => i,
"*" => {
// Star import all
vec![ast::Alias { name: "*".to_string(), asname: None }]
vec![ast::Alias { name: "*".into(), asname: None }]
},
};
#[inline]
ImportAsAlias<I>: ast::Alias = {
<name:I> <a: ("as" Identifier)?> => ast::Alias { name, asname: a.map(|a| a.1) },
<name:I> <a: ("as" Identifier)?> => ast::Alias { name: name.into(), asname: a.map(|a| a.1) },
};
// A name like abc or abc.def.ghi
DottedName: String = {
<n:name> => n,
<n:name> => n.into(),
<n:name> <n2: ("." Identifier)+> => {
let mut r = n.to_string();
let lock = ast::get_str_ref_lock();
let mut r = ast::get_str_from_ref(&lock, n).to_string();
for x in n2 {
r.push_str(".");
r.push_str(&x.1);
r.push_str(&ast::get_str_from_ref(&lock, x.1));
}
r
},
@ -1157,7 +1158,7 @@ FunctionArgument: (Option<(ast::Location, Option<String>)>, ast::Expr) = {
};
(None, expr)
},
<location:@L> <i:Identifier> "=" <e:Test> => (Some((location, Some(i))), e),
<location:@L> <i:Identifier> "=" <e:Test> => (Some((location, Some(i.into()))), e),
<location:@L> "*" <e:Test> => {
let expr = ast::Expr::new(
location,
@ -1199,7 +1200,7 @@ Bytes: Vec<u8> = {
},
};
Identifier: String = <s:name> => s;
Identifier: ast::StrRef = <s:name> => s;
// Hook external lexer:
extern {
@ -1299,7 +1300,7 @@ extern {
complex => lexer::Tok::Complex { real: <f64>, imag: <f64> },
string => lexer::Tok::String { value: <String>, is_fstring: <bool> },
bytes => lexer::Tok::Bytes { value: <Vec<u8>> },
name => lexer::Tok::Name { name: <String> },
name => lexer::Tok::Name { name: <ast::StrRef> },
"\n" => lexer::Tok::Newline,
";" => lexer::Tok::Semi,
}

View File

@ -2,11 +2,12 @@
//! Loosely based on token.h from CPython source:
use num_bigint::BigInt;
use std::fmt::{self, Write};
use crate::ast;
/// Python source code can be tokenized in a sequence of these tokens.
#[derive(Clone, Debug, PartialEq)]
pub enum Tok {
Name { name: String },
Name { name: ast::StrRef },
Int { value: BigInt },
Float { value: f64 },
Complex { real: f64, imag: f64 },
@ -110,7 +111,7 @@ impl fmt::Display for Tok {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use Tok::*;
match self {
Name { name } => write!(f, "'{}'", name),
Name { name } => write!(f, "'{}'", ast::get_str_from_ref(&ast::get_str_ref_lock(), *name)),
Int { value } => write!(f, "'{}'", value),
Float { value } => write!(f, "'{}'", value),
Complex { real, imag } => write!(f, "{}j{}", real, imag),