rustpython-parser: string interner, optimizations, thread local cache

corresponds to M-Labs RustPython fork at efdf7829ba1a5f87d30df8eaff12a330544f3cbd
branch parser-mod
This commit is contained in:
pca006132 2021-11-03 16:35:16 +08:00 committed by Sebastien Bourdeauducq
parent 80c7bc1cbd
commit 48ce6bb6c5
10 changed files with 737 additions and 647 deletions

View File

@ -11,3 +11,7 @@ fold = []
[dependencies] [dependencies]
num-bigint = "0.4.0" num-bigint = "0.4.0"
lazy_static = "1.4.0"
parking_lot = "0.11.1"
string-interner = "0.13.0"
fxhash = "0.2.1"

View File

@ -185,7 +185,7 @@ class StructVisitor(TypeInfoEmitVisitor):
self.sum_with_constructors(sum, name, depth) self.sum_with_constructors(sum, name, depth)
def emit_attrs(self, depth): def emit_attrs(self, depth):
self.emit("#[derive(Debug, PartialEq)]", depth) self.emit("#[derive(Clone, Debug, PartialEq)]", depth)
def simple_sum(self, sum, name, depth): def simple_sum(self, sum, name, depth):
rustname = get_rust_type(name) rustname = get_rust_type(name)

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
use num_bigint::BigInt; use num_bigint::BigInt;
#[derive(Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub enum Constant { pub enum Constant {
None, None,
Bool(bool), Bool(bool),

View File

@ -1,5 +1,6 @@
use crate::constant; use crate::constant;
use crate::fold::Fold; use crate::fold::Fold;
use crate::StrRef;
pub(crate) trait Foldable<T, U> { pub(crate) trait Foldable<T, U> {
type Mapped; type Mapped;
@ -67,6 +68,7 @@ simple_fold!(
usize, usize,
String, String,
bool, bool,
StrRef,
constant::Constant, constant::Constant,
constant::ConversionFlag constant::ConversionFlag
); );

View File

@ -1,3 +1,6 @@
#[macro_use]
extern crate lazy_static;
mod ast_gen; mod ast_gen;
mod constant; mod constant;
#[cfg(feature = "fold")] #[cfg(feature = "fold")]

View File

@ -70,7 +70,7 @@ pub fn parse_args(func_args: Vec<FunctionArgument>) -> Result<ArgumentList, Lexi
keywords.push(ast::Keyword::new( keywords.push(ast::Keyword::new(
location, location,
ast::KeywordData { ast::KeywordData {
arg: name, arg: name.map(|name| name.into()),
value: Box::new(value), value: Box::new(value),
}, },
)); ));

View File

@ -250,7 +250,7 @@ where
if let Some(tok) = KEYWORDS.get(name.as_str()) { if let Some(tok) = KEYWORDS.get(name.as_str()) {
Ok((start_pos, tok.clone(), end_pos)) Ok((start_pos, tok.clone(), end_pos))
} else { } else {
Ok((start_pos, Tok::Name { name }, end_pos)) Ok((start_pos, Tok::Name { name: name.into() }, end_pos))
} }
} }
@ -622,13 +622,18 @@ where
} }
fn is_identifier_start(&self, c: char) -> bool { fn is_identifier_start(&self, c: char) -> bool {
c == '_' || is_xid_start(c) match c {
'_' | 'a'..='z' | 'A'..='Z' => true,
'+' | '-' | '*' | '/' | '=' | ' ' | '<' | '>' => false,
c => is_xid_start(c),
}
} }
fn is_identifier_continuation(&self) -> bool { fn is_identifier_continuation(&self) -> bool {
if let Some(c) = self.chr0 { if let Some(c) = self.chr0 {
match c { match c {
'_' | '0'..='9' => true, '_' | '0'..='9' | 'a'..='z' | 'A'..='Z' => true,
'+' | '-' | '*' | '/' | '=' | ' ' | '<' | '>' => false,
c => is_xid_continue(c), c => is_xid_continue(c),
} }
} else { } else {
@ -784,9 +789,7 @@ where
let tok_end = self.get_pos(); let tok_end = self.get_pos();
self.emit(( self.emit((
tok_start, tok_start,
Tok::Name { Tok::Name { name: c.to_string().into() },
name: c.to_string(),
},
tok_end, tok_end,
)); ));
} else { } else {
@ -1402,7 +1405,7 @@ mod tests {
tokens, tokens,
vec![ vec![
Tok::Name { Tok::Name {
name: String::from("avariable"), name: String::from("avariable").into(),
}, },
Tok::Equal, Tok::Equal,
Tok::Int { Tok::Int {
@ -1433,7 +1436,7 @@ mod tests {
vec![ vec![
Tok::Def, Tok::Def,
Tok::Name { Tok::Name {
name: String::from("foo"), name: String::from("foo").into(),
}, },
Tok::Lpar, Tok::Lpar,
Tok::Rpar, Tok::Rpar,
@ -1469,7 +1472,7 @@ mod tests {
vec![ vec![
Tok::Def, Tok::Def,
Tok::Name { Tok::Name {
name: String::from("foo"), name: String::from("foo").into(),
}, },
Tok::Lpar, Tok::Lpar,
Tok::Rpar, Tok::Rpar,
@ -1478,7 +1481,7 @@ mod tests {
Tok::Indent, Tok::Indent,
Tok::If, Tok::If,
Tok::Name { Tok::Name {
name: String::from("x"), name: String::from("x").into(),
}, },
Tok::Colon, Tok::Colon,
Tok::Newline, Tok::Newline,
@ -1507,7 +1510,7 @@ mod tests {
vec![ vec![
Tok::Def, Tok::Def,
Tok::Name { Tok::Name {
name: String::from("foo"), name: String::from("foo").into(),
}, },
Tok::Lpar, Tok::Lpar,
Tok::Rpar, Tok::Rpar,
@ -1516,7 +1519,7 @@ mod tests {
Tok::Indent, Tok::Indent,
Tok::If, Tok::If,
Tok::Name { Tok::Name {
name: String::from("x"), name: String::from("x").into(),
}, },
Tok::Colon, Tok::Colon,
Tok::Newline, Tok::Newline,
@ -1556,7 +1559,7 @@ mod tests {
tokens, tokens,
vec![ vec![
Tok::Name { Tok::Name {
name: String::from("x"), name: String::from("x").into(),
}, },
Tok::Equal, Tok::Equal,
Tok::Lsqb, Tok::Lsqb,

View File

@ -246,7 +246,7 @@ ImportStatement: ast::Stmt = {
location, location,
node: ast::StmtKind::ImportFrom { node: ast::StmtKind::ImportFrom {
level, level,
module, module: module.map(|s| s.into()),
names names
}, },
} }
@ -272,24 +272,25 @@ ImportAsNames: Vec<ast::Alias> = {
"(" <i:OneOrMore<ImportAsAlias<Identifier>>> ","? ")" => i, "(" <i:OneOrMore<ImportAsAlias<Identifier>>> ","? ")" => i,
"*" => { "*" => {
// Star import all // Star import all
vec![ast::Alias { name: "*".to_string(), asname: None }] vec![ast::Alias { name: "*".into(), asname: None }]
}, },
}; };
#[inline] #[inline]
ImportAsAlias<I>: ast::Alias = { ImportAsAlias<I>: ast::Alias = {
<name:I> <a: ("as" Identifier)?> => ast::Alias { name, asname: a.map(|a| a.1) }, <name:I> <a: ("as" Identifier)?> => ast::Alias { name: name.into(), asname: a.map(|a| a.1) },
}; };
// A name like abc or abc.def.ghi // A name like abc or abc.def.ghi
DottedName: String = { DottedName: String = {
<n:name> => n, <n:name> => n.into(),
<n:name> <n2: ("." Identifier)+> => { <n:name> <n2: ("." Identifier)+> => {
let mut r = n.to_string(); let lock = ast::get_str_ref_lock();
let mut r = ast::get_str_from_ref(&lock, n).to_string();
for x in n2 { for x in n2 {
r.push_str("."); r.push_str(".");
r.push_str(&x.1); r.push_str(&ast::get_str_from_ref(&lock, x.1));
} }
r r
}, },
@ -1157,7 +1158,7 @@ FunctionArgument: (Option<(ast::Location, Option<String>)>, ast::Expr) = {
}; };
(None, expr) (None, expr)
}, },
<location:@L> <i:Identifier> "=" <e:Test> => (Some((location, Some(i))), e), <location:@L> <i:Identifier> "=" <e:Test> => (Some((location, Some(i.into()))), e),
<location:@L> "*" <e:Test> => { <location:@L> "*" <e:Test> => {
let expr = ast::Expr::new( let expr = ast::Expr::new(
location, location,
@ -1199,7 +1200,7 @@ Bytes: Vec<u8> = {
}, },
}; };
Identifier: String = <s:name> => s; Identifier: ast::StrRef = <s:name> => s;
// Hook external lexer: // Hook external lexer:
extern { extern {
@ -1299,7 +1300,7 @@ extern {
complex => lexer::Tok::Complex { real: <f64>, imag: <f64> }, complex => lexer::Tok::Complex { real: <f64>, imag: <f64> },
string => lexer::Tok::String { value: <String>, is_fstring: <bool> }, string => lexer::Tok::String { value: <String>, is_fstring: <bool> },
bytes => lexer::Tok::Bytes { value: <Vec<u8>> }, bytes => lexer::Tok::Bytes { value: <Vec<u8>> },
name => lexer::Tok::Name { name: <String> }, name => lexer::Tok::Name { name: <ast::StrRef> },
"\n" => lexer::Tok::Newline, "\n" => lexer::Tok::Newline,
";" => lexer::Tok::Semi, ";" => lexer::Tok::Semi,
} }

View File

@ -2,11 +2,12 @@
//! Loosely based on token.h from CPython source: //! Loosely based on token.h from CPython source:
use num_bigint::BigInt; use num_bigint::BigInt;
use std::fmt::{self, Write}; use std::fmt::{self, Write};
use crate::ast;
/// Python source code can be tokenized in a sequence of these tokens. /// Python source code can be tokenized in a sequence of these tokens.
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub enum Tok { pub enum Tok {
Name { name: String }, Name { name: ast::StrRef },
Int { value: BigInt }, Int { value: BigInt },
Float { value: f64 }, Float { value: f64 },
Complex { real: f64, imag: f64 }, Complex { real: f64, imag: f64 },
@ -110,7 +111,7 @@ impl fmt::Display for Tok {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use Tok::*; use Tok::*;
match self { match self {
Name { name } => write!(f, "'{}'", name), Name { name } => write!(f, "'{}'", ast::get_str_from_ref(&ast::get_str_ref_lock(), *name)),
Int { value } => write!(f, "'{}'", value), Int { value } => write!(f, "'{}'", value),
Float { value } => write!(f, "'{}'", value), Float { value } => write!(f, "'{}'", value),
Complex { real, imag } => write!(f, "{}j{}", real, imag), Complex { real, imag } => write!(f, "{}j{}", real, imag),