rustpython-parser: string interner, optimizations, thread local cache
corresponds to M-Labs RustPython fork at efdf7829ba1a5f87d30df8eaff12a330544f3cbd branch parser-mod
This commit is contained in:
parent
80c7bc1cbd
commit
48ce6bb6c5
@ -11,3 +11,7 @@ fold = []
|
||||
|
||||
[dependencies]
|
||||
num-bigint = "0.4.0"
|
||||
lazy_static = "1.4.0"
|
||||
parking_lot = "0.11.1"
|
||||
string-interner = "0.13.0"
|
||||
fxhash = "0.2.1"
|
||||
|
@ -185,7 +185,7 @@ class StructVisitor(TypeInfoEmitVisitor):
|
||||
self.sum_with_constructors(sum, name, depth)
|
||||
|
||||
def emit_attrs(self, depth):
|
||||
self.emit("#[derive(Debug, PartialEq)]", depth)
|
||||
self.emit("#[derive(Clone, Debug, PartialEq)]", depth)
|
||||
|
||||
def simple_sum(self, sum, name, depth):
|
||||
rustname = get_rust_type(name)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
use num_bigint::BigInt;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Constant {
|
||||
None,
|
||||
Bool(bool),
|
||||
|
@ -1,5 +1,6 @@
|
||||
use crate::constant;
|
||||
use crate::fold::Fold;
|
||||
use crate::StrRef;
|
||||
|
||||
pub(crate) trait Foldable<T, U> {
|
||||
type Mapped;
|
||||
@ -67,6 +68,7 @@ simple_fold!(
|
||||
usize,
|
||||
String,
|
||||
bool,
|
||||
StrRef,
|
||||
constant::Constant,
|
||||
constant::ConversionFlag
|
||||
);
|
||||
|
@ -1,3 +1,6 @@
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
|
||||
mod ast_gen;
|
||||
mod constant;
|
||||
#[cfg(feature = "fold")]
|
||||
|
@ -70,7 +70,7 @@ pub fn parse_args(func_args: Vec<FunctionArgument>) -> Result<ArgumentList, Lexi
|
||||
keywords.push(ast::Keyword::new(
|
||||
location,
|
||||
ast::KeywordData {
|
||||
arg: name,
|
||||
arg: name.map(|name| name.into()),
|
||||
value: Box::new(value),
|
||||
},
|
||||
));
|
||||
|
@ -250,7 +250,7 @@ where
|
||||
if let Some(tok) = KEYWORDS.get(name.as_str()) {
|
||||
Ok((start_pos, tok.clone(), end_pos))
|
||||
} else {
|
||||
Ok((start_pos, Tok::Name { name }, end_pos))
|
||||
Ok((start_pos, Tok::Name { name: name.into() }, end_pos))
|
||||
}
|
||||
}
|
||||
|
||||
@ -622,13 +622,18 @@ where
|
||||
}
|
||||
|
||||
fn is_identifier_start(&self, c: char) -> bool {
|
||||
c == '_' || is_xid_start(c)
|
||||
match c {
|
||||
'_' | 'a'..='z' | 'A'..='Z' => true,
|
||||
'+' | '-' | '*' | '/' | '=' | ' ' | '<' | '>' => false,
|
||||
c => is_xid_start(c),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_identifier_continuation(&self) -> bool {
|
||||
if let Some(c) = self.chr0 {
|
||||
match c {
|
||||
'_' | '0'..='9' => true,
|
||||
'_' | '0'..='9' | 'a'..='z' | 'A'..='Z' => true,
|
||||
'+' | '-' | '*' | '/' | '=' | ' ' | '<' | '>' => false,
|
||||
c => is_xid_continue(c),
|
||||
}
|
||||
} else {
|
||||
@ -784,9 +789,7 @@ where
|
||||
let tok_end = self.get_pos();
|
||||
self.emit((
|
||||
tok_start,
|
||||
Tok::Name {
|
||||
name: c.to_string(),
|
||||
},
|
||||
Tok::Name { name: c.to_string().into() },
|
||||
tok_end,
|
||||
));
|
||||
} else {
|
||||
@ -1402,7 +1405,7 @@ mod tests {
|
||||
tokens,
|
||||
vec![
|
||||
Tok::Name {
|
||||
name: String::from("avariable"),
|
||||
name: String::from("avariable").into(),
|
||||
},
|
||||
Tok::Equal,
|
||||
Tok::Int {
|
||||
@ -1433,7 +1436,7 @@ mod tests {
|
||||
vec![
|
||||
Tok::Def,
|
||||
Tok::Name {
|
||||
name: String::from("foo"),
|
||||
name: String::from("foo").into(),
|
||||
},
|
||||
Tok::Lpar,
|
||||
Tok::Rpar,
|
||||
@ -1469,7 +1472,7 @@ mod tests {
|
||||
vec![
|
||||
Tok::Def,
|
||||
Tok::Name {
|
||||
name: String::from("foo"),
|
||||
name: String::from("foo").into(),
|
||||
},
|
||||
Tok::Lpar,
|
||||
Tok::Rpar,
|
||||
@ -1478,7 +1481,7 @@ mod tests {
|
||||
Tok::Indent,
|
||||
Tok::If,
|
||||
Tok::Name {
|
||||
name: String::from("x"),
|
||||
name: String::from("x").into(),
|
||||
},
|
||||
Tok::Colon,
|
||||
Tok::Newline,
|
||||
@ -1507,7 +1510,7 @@ mod tests {
|
||||
vec![
|
||||
Tok::Def,
|
||||
Tok::Name {
|
||||
name: String::from("foo"),
|
||||
name: String::from("foo").into(),
|
||||
},
|
||||
Tok::Lpar,
|
||||
Tok::Rpar,
|
||||
@ -1516,7 +1519,7 @@ mod tests {
|
||||
Tok::Indent,
|
||||
Tok::If,
|
||||
Tok::Name {
|
||||
name: String::from("x"),
|
||||
name: String::from("x").into(),
|
||||
},
|
||||
Tok::Colon,
|
||||
Tok::Newline,
|
||||
@ -1556,7 +1559,7 @@ mod tests {
|
||||
tokens,
|
||||
vec![
|
||||
Tok::Name {
|
||||
name: String::from("x"),
|
||||
name: String::from("x").into(),
|
||||
},
|
||||
Tok::Equal,
|
||||
Tok::Lsqb,
|
||||
|
@ -246,7 +246,7 @@ ImportStatement: ast::Stmt = {
|
||||
location,
|
||||
node: ast::StmtKind::ImportFrom {
|
||||
level,
|
||||
module,
|
||||
module: module.map(|s| s.into()),
|
||||
names
|
||||
},
|
||||
}
|
||||
@ -272,24 +272,25 @@ ImportAsNames: Vec<ast::Alias> = {
|
||||
"(" <i:OneOrMore<ImportAsAlias<Identifier>>> ","? ")" => i,
|
||||
"*" => {
|
||||
// Star import all
|
||||
vec![ast::Alias { name: "*".to_string(), asname: None }]
|
||||
vec![ast::Alias { name: "*".into(), asname: None }]
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
#[inline]
|
||||
ImportAsAlias<I>: ast::Alias = {
|
||||
<name:I> <a: ("as" Identifier)?> => ast::Alias { name, asname: a.map(|a| a.1) },
|
||||
<name:I> <a: ("as" Identifier)?> => ast::Alias { name: name.into(), asname: a.map(|a| a.1) },
|
||||
};
|
||||
|
||||
// A name like abc or abc.def.ghi
|
||||
DottedName: String = {
|
||||
<n:name> => n,
|
||||
<n:name> => n.into(),
|
||||
<n:name> <n2: ("." Identifier)+> => {
|
||||
let mut r = n.to_string();
|
||||
let lock = ast::get_str_ref_lock();
|
||||
let mut r = ast::get_str_from_ref(&lock, n).to_string();
|
||||
for x in n2 {
|
||||
r.push_str(".");
|
||||
r.push_str(&x.1);
|
||||
r.push_str(&ast::get_str_from_ref(&lock, x.1));
|
||||
}
|
||||
r
|
||||
},
|
||||
@ -1157,7 +1158,7 @@ FunctionArgument: (Option<(ast::Location, Option<String>)>, ast::Expr) = {
|
||||
};
|
||||
(None, expr)
|
||||
},
|
||||
<location:@L> <i:Identifier> "=" <e:Test> => (Some((location, Some(i))), e),
|
||||
<location:@L> <i:Identifier> "=" <e:Test> => (Some((location, Some(i.into()))), e),
|
||||
<location:@L> "*" <e:Test> => {
|
||||
let expr = ast::Expr::new(
|
||||
location,
|
||||
@ -1199,7 +1200,7 @@ Bytes: Vec<u8> = {
|
||||
},
|
||||
};
|
||||
|
||||
Identifier: String = <s:name> => s;
|
||||
Identifier: ast::StrRef = <s:name> => s;
|
||||
|
||||
// Hook external lexer:
|
||||
extern {
|
||||
@ -1299,7 +1300,7 @@ extern {
|
||||
complex => lexer::Tok::Complex { real: <f64>, imag: <f64> },
|
||||
string => lexer::Tok::String { value: <String>, is_fstring: <bool> },
|
||||
bytes => lexer::Tok::Bytes { value: <Vec<u8>> },
|
||||
name => lexer::Tok::Name { name: <String> },
|
||||
name => lexer::Tok::Name { name: <ast::StrRef> },
|
||||
"\n" => lexer::Tok::Newline,
|
||||
";" => lexer::Tok::Semi,
|
||||
}
|
||||
|
@ -2,11 +2,12 @@
|
||||
//! Loosely based on token.h from CPython source:
|
||||
use num_bigint::BigInt;
|
||||
use std::fmt::{self, Write};
|
||||
use crate::ast;
|
||||
|
||||
/// Python source code can be tokenized in a sequence of these tokens.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Tok {
|
||||
Name { name: String },
|
||||
Name { name: ast::StrRef },
|
||||
Int { value: BigInt },
|
||||
Float { value: f64 },
|
||||
Complex { real: f64, imag: f64 },
|
||||
@ -110,7 +111,7 @@ impl fmt::Display for Tok {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
use Tok::*;
|
||||
match self {
|
||||
Name { name } => write!(f, "'{}'", name),
|
||||
Name { name } => write!(f, "'{}'", ast::get_str_from_ref(&ast::get_str_ref_lock(), *name)),
|
||||
Int { value } => write!(f, "'{}'", value),
|
||||
Float { value } => write!(f, "'{}'", value),
|
||||
Complex { real, imag } => write!(f, "{}j{}", real, imag),
|
||||
|
Loading…
Reference in New Issue
Block a user