From 864be73e77e8b58e5fdc00ac8da883c09f6f88ba Mon Sep 17 00:00:00 2001 From: imxyy_soope_ Date: Thu, 15 May 2025 18:19:16 +0800 Subject: [PATCH] feat: less clone on symbol --- src/builtins/mod.rs | 17 ++++----- src/bytecode.rs | 21 ++++++++--- src/compile.rs | 45 +++++------------------ src/ir.rs | 59 +++++++++++++++++++----------- src/ty/common.rs | 49 +------------------------ src/ty/internal/attrset.rs | 30 +++++++-------- src/ty/internal/func.rs | 11 +++--- src/ty/internal/mod.rs | 10 ++--- src/ty/public/mod.rs | 48 ++++++++++++++++++++++++ src/vm/env.rs | 11 +++--- src/vm/mod.rs | 75 +++++++++++++++++++++++--------------- src/vm/test.rs | 2 +- 12 files changed, 195 insertions(+), 183 deletions(-) diff --git a/src/builtins/mod.rs b/src/builtins/mod.rs index c26c1c8..06bc924 100644 --- a/src/builtins/mod.rs +++ b/src/builtins/mod.rs @@ -1,14 +1,13 @@ use std::cell::RefCell; use std::rc::Rc; -use crate::ty::common::Symbol; use crate::ty::internal::{_Thunk, Const, PrimOp, RecAttrSet, Thunk, Value}; -use crate::vm::Env; +use crate::vm::{Env, VM}; -pub fn env<'vm>() -> Rc> { +pub fn env<'vm>(vm: &'vm VM) -> Rc> { let env = Rc::new(Env::empty()); - env.insert(Symbol::from("true"), Value::Const(Const::Bool(true))); - env.insert(Symbol::from("false"), Value::Const(Const::Bool(false))); + env.insert(vm.new_sym("true"), Value::Const(Const::Bool(true))); + env.insert(vm.new_sym("false"), Value::Const(Const::Bool(false))); let primops = [ PrimOp::new("add", 2, |_, args| { @@ -47,17 +46,17 @@ pub fn env<'vm>() -> Rc> { let map = builtins_env.clone().new_rec(); for primop in primops { env.insert( - Symbol::from(format!("__{}", primop.name)), + vm.new_sym(format!("__{}", primop.name)), Value::PrimOp(primop.clone()), ); - map.insert(Symbol::from(primop.name), Value::PrimOp(primop)); + map.insert(vm.new_sym(primop.name), Value::PrimOp(primop)); } let builtins = Value::RecAttrSet(RecAttrSet::from_inner(map.clone())); let thunk = Thunk { thunk: Rc::new(RefCell::new(_Thunk::Value(Box::new(builtins.clone())))), }; - map.insert(Symbol::from("builtins"), Value::Thunk(thunk)); + map.insert(vm.new_sym("builtins"), Value::Thunk(thunk)); - env.insert(Symbol::from("builtins"), builtins); + env.insert(vm.new_sym("builtins"), builtins); env } diff --git a/src/bytecode.rs b/src/bytecode.rs index 51e904b..90e7991 100644 --- a/src/bytecode.rs +++ b/src/bytecode.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + use ecow::EcoString; use crate::ty::internal::{Const, Param}; @@ -11,10 +13,10 @@ pub enum OpCode { /// load a constant onto stack Const { value: Const }, /// load a dynamic var onto stack - LookUp { sym: EcoString }, + LookUp { sym: usize }, /// load a thunk lazily onto stack LoadThunk { idx: usize }, - /// TODO: + /// let TOS capture current environment CaptureEnv, /// force TOS to value ForceValue, @@ -40,7 +42,7 @@ pub enum OpCode { /// finalize the recursive attribute set at TOS FinalizeRec, /// [ .. set value ] consume 1 element, push a static kv pair (`name`, `value`) into `set` - PushStaticAttr { name: EcoString }, + PushStaticAttr { name: usize }, /// [ .. set name value ] consume 2 elements, push a dynamic kv pair (`name`, `value`) in to `set` PushDynamicAttr, @@ -59,13 +61,13 @@ pub enum OpCode { /// [ .. a ] consume 1 element, perform a unary operation `op` `a` UnOp { op: UnOp }, /// set TOS to the bool value of whether TOS contains `sym` - HasAttr { sym: EcoString }, + HasAttr { sym: usize }, /// [ .. set sym ] consume 2 elements, set TOS to the bool value of whether `set` contains `sym` HasDynamicAttr, /// [ .. set ] select `sym` from `set` - Select { sym: EcoString }, + Select { sym: usize }, /// [ .. set default ] select `sym` from `set` or `default` - SelectOrDefault { sym: EcoString }, + SelectOrDefault { sym: usize }, /// [ .. set sym ] select `sym` from `set` SelectDynamic, /// [ .. set sym default ] select `sym` from `set` or `default` @@ -82,15 +84,20 @@ pub enum OpCode { #[derive(Debug, Clone, Copy)] pub enum BinOp { Add, + Sub, + Mul, + Div, And, Or, Eq, + Lt, Con, Upd, } #[derive(Debug, Clone, Copy)] pub enum UnOp { + Neg, Not, } @@ -105,4 +112,6 @@ pub struct Program { pub top_level: OpCodes, pub thunks: Slice, pub funcs: Slice, + pub symbols: Vec, + pub symmap: HashMap, } diff --git a/src/compile.rs b/src/compile.rs index eea5c15..e7afaca 100644 --- a/src/compile.rs +++ b/src/compile.rs @@ -22,6 +22,8 @@ pub fn compile(downgraded: ir::Downgraded) -> Program { opcodes: Compiler::new().compile(*func.body), }) .collect(), + symbols: downgraded.symbols, + symmap: downgraded.symmap } } @@ -132,14 +134,8 @@ impl Compile for ir::UnOp { use ir::UnOpKind::*; match self.kind { Neg => { - comp.push(OpCode::LookUp { - sym: "__sub".into(), - }); - comp.push(OpCode::Const { - value: Const::Int(0), - }); self.rhs.compile(comp); - comp.push(OpCode::Call { arity: 2 }); + comp.push(OpCode::UnOp { op: UnOp::Neg }); } Not => { self.rhs.compile(comp); @@ -159,20 +155,14 @@ impl Compile for ir::BinOp { comp.push(OpCode::BinOp { op: BinOp::Add }); } Mul => { - comp.push(OpCode::LookUp { - sym: "__mul".into(), - }); self.lhs.compile(comp); self.rhs.compile(comp); - comp.push(OpCode::Call { arity: 2 }); + comp.push(OpCode::BinOp { op: BinOp::Mul }); } Div => { - comp.push(OpCode::LookUp { - sym: "__div".into(), - }); self.lhs.compile(comp); self.rhs.compile(comp); - comp.push(OpCode::Call { arity: 2 }); + comp.push(OpCode::BinOp { op: BinOp::Div }); } And => { self.lhs.compile(comp); @@ -190,12 +180,9 @@ impl Compile for ir::BinOp { comp.push(OpCode::BinOp { op: BinOp::Eq }); } Lt => { - comp.push(OpCode::LookUp { - sym: "__lessThan".into(), - }); self.lhs.compile(comp); self.rhs.compile(comp); - comp.push(OpCode::Call { arity: 2 }); + comp.push(OpCode::BinOp { op: BinOp::Lt }); } Con => { self.lhs.compile(comp); @@ -209,12 +196,9 @@ impl Compile for ir::BinOp { } Sub => { - comp.push(OpCode::LookUp { - sym: "__sub".into(), - }); self.lhs.compile(comp); self.rhs.compile(comp); - comp.push(OpCode::Call { arity: 2 }); + comp.push(OpCode::BinOp { op: BinOp::Sub }); } Impl => { self.lhs.compile(comp); @@ -229,29 +213,20 @@ impl Compile for ir::BinOp { comp.push(OpCode::UnOp { op: UnOp::Not }); } Gt => { - comp.push(OpCode::LookUp { - sym: "__lessThan".into(), - }); self.rhs.compile(comp); self.lhs.compile(comp); - comp.push(OpCode::Call { arity: 2 }); + comp.push(OpCode::BinOp { op: BinOp::Lt }); } Leq => { - comp.push(OpCode::LookUp { - sym: "__lessThan".into(), - }); self.rhs.compile(comp); self.lhs.compile(comp); - comp.push(OpCode::Call { arity: 2 }); + comp.push(OpCode::BinOp { op: BinOp::Lt }); comp.push(OpCode::UnOp { op: UnOp::Not }); } Geq => { - comp.push(OpCode::LookUp { - sym: "__lessThan".into(), - }); self.lhs.compile(comp); self.rhs.compile(comp); - comp.push(OpCode::Call { arity: 2 }); + comp.push(OpCode::BinOp { op: BinOp::Lt }); comp.push(OpCode::UnOp { op: UnOp::Not }); } diff --git a/src/ir.rs b/src/ir.rs index 99f0b45..01c1f4e 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -13,6 +13,8 @@ pub fn downgrade(expr: Expr) -> Result { Ok(Downgraded { top_level: ir, consts: ctx.consts.into(), + symbols: ctx.symbols, + symmap: ctx.symmap, thunks: ctx.thunks.into(), funcs: ctx.funcs.into(), }) @@ -96,7 +98,7 @@ macro_rules! ir { } ir! { - Attrs => { stcs: HashMap, dyns: Vec, rec: bool }, + Attrs => { stcs: HashMap, dyns: Vec, rec: bool }, List => { items: Vec }, HasAttr => { lhs: Box, rhs: Vec }, BinOp => { lhs: Box, rhs: Box, kind: BinOpKind }, @@ -111,7 +113,7 @@ ir! { Assert => { assertion: Box, expr: Box }, ConcatStrings => { parts: Vec }, Const => { value: i::Const }, - Var => { sym: EcoString }, + Var => { sym: usize }, #[derive(Copy)] Thunk => { idx: usize }, Path => { expr: Box }, @@ -125,11 +127,15 @@ pub struct DowngradeContext { thunks: Vec, funcs: Vec, consts: Vec, + symbols: Vec, + symmap: HashMap, } pub struct Downgraded { pub top_level: Ir, pub consts: Box<[i::Const]>, + pub symbols: Vec, + pub symmap: HashMap, pub thunks: Box<[Ir]>, pub funcs: Box<[Func]>, } @@ -150,6 +156,17 @@ impl DowngradeContext { self.funcs.push(func); LoadFunc { idx } } + + fn new_sym(&mut self, sym: impl Into) -> usize { + let sym = sym.into(); + if let Some(&idx) = self.symmap.get(&sym) { + idx + } else { + self.symmap.insert(sym.clone(), self.symbols.len()); + self.symbols.push(sym); + self.symbols.len() - 1 + } + } } impl Attrs { @@ -231,7 +248,7 @@ impl Attrs { match path.next() { Some(Attr::Str(ident)) => self .stcs - .get(ident.as_str()) + .get(ident) .and_then(|attrs| attrs.downcast_ref()) .map_or(Some(false), |attrs: &Attrs| attrs._has_attr(path, name)), None => match name { @@ -253,7 +270,7 @@ impl Attrs { pub enum Attr { Dynamic(Ir), Strs(ConcatStrings), - Str(EcoString), + Str(usize), } #[derive(Clone, Debug)] @@ -327,11 +344,11 @@ pub struct Func { #[derive(Clone, Debug)] pub enum Param { - Ident(EcoString), + Ident(usize), Formals { - formals: Vec<(EcoString, Option)>, + formals: Vec<(usize, Option)>, ellipsis: bool, - alias: Option, + alias: Option, }, } @@ -460,9 +477,9 @@ impl Downgrade for ast::Literal { } impl Downgrade for ast::Ident { - fn downgrade(self, _ctx: &mut DowngradeContext) -> Result { + fn downgrade(self, ctx: &mut DowngradeContext) -> Result { Var { - sym: self.to_string().into(), + sym: ctx.new_sym(self.to_string()), } .ir() .ok() @@ -547,7 +564,7 @@ impl Downgrade for ast::LegacyLet { let attrs = downgrade_has_entry(self, true, ctx)?; Select { expr: attrs.ir().boxed(), - attrpath: vec![Attr::Str("body".to_string().into())], + attrpath: vec![Attr::Str(ctx.new_sym("body".to_string()))], default: None, } .ir() @@ -603,7 +620,7 @@ impl Downgrade for ast::Apply { fn downgrade_param(param: ast::Param, ctx: &mut DowngradeContext) -> Result { match param { - ast::Param::IdentParam(ident) => Ok(Param::Ident(ident.to_string().into())), + ast::Param::IdentParam(ident) => Ok(Param::Ident(ctx.new_sym(ident.to_string()))), ast::Param::Pattern(pattern) => downgrade_pattern(pattern, ctx), } } @@ -612,7 +629,7 @@ fn downgrade_pattern(pattern: ast::Pattern, ctx: &mut DowngradeContext) -> Resul let formals = pattern .pat_entries() .map(|entry| { - let ident = entry.ident().unwrap().to_string().into(); + let ident = ctx.new_sym(entry.ident().unwrap().to_string()); if entry.default().is_none() { Ok((ident, None)) } else { @@ -627,7 +644,7 @@ fn downgrade_pattern(pattern: ast::Pattern, ctx: &mut DowngradeContext) -> Resul let ellipsis = pattern.ellipsis_token().is_some(); let alias = pattern .pat_bind() - .map(|alias| alias.ident().unwrap().to_string().into()); + .map(|alias| ctx.new_sym(alias.ident().unwrap().to_string())); Ok(Param::Formals { formals, ellipsis, @@ -659,7 +676,7 @@ fn downgrade_has_entry( fn downgrade_inherit( inherit: ast::Inherit, - stcs: &mut HashMap, + stcs: &mut HashMap, ctx: &mut DowngradeContext, ) -> Result<()> { let from = if let Some(from) = inherit.from() { @@ -669,8 +686,8 @@ fn downgrade_inherit( None }; for attr in inherit.attrs() { - let ident: EcoString = match downgrade_attr(attr, ctx)? { - Attr::Str(ident) => ident.to_string().into(), + let ident = match downgrade_attr(attr, ctx)? { + Attr::Str(ident) => ctx.new_sym(ident.to_string()), _ => { return Err(Error::DowngradeError( "dynamic attributes not allowed in inherit".to_string(), @@ -678,11 +695,11 @@ fn downgrade_inherit( } }; let expr = from.map_or_else( - || Var { sym: ident.clone() }.ir().ok(), + || Var { sym: ident }.ir().ok(), |from| { Ok(Select { expr: from.ir().boxed(), - attrpath: vec![Attr::Str(ident.clone())], + attrpath: vec![Attr::Str(ident)], default: None, } .ir()) @@ -697,14 +714,14 @@ fn downgrade_attr(attr: ast::Attr, ctx: &mut DowngradeContext) -> Result { use ast::Attr::*; use ast::InterpolPart::*; match attr { - Ident(ident) => Ok(Attr::Str(ident.to_string().into())), + Ident(ident) => Ok(Attr::Str(ctx.new_sym(ident.to_string()))), Str(string) => { let parts = string.normalized_parts(); if parts.len() == 0 { - Ok(Attr::Str("".into())) + Ok(Attr::Str(ctx.new_sym(""))) } else if parts.len() == 1 { match parts.into_iter().next().unwrap() { - Literal(ident) => Ok(Attr::Str(ident.into())), + Literal(ident) => Ok(Attr::Str(ctx.new_sym(ident))), Interpolation(interpol) => { Ok(Attr::Dynamic(interpol.expr().unwrap().downgrade(ctx)?)) } diff --git a/src/ty/common.rs b/src/ty/common.rs index 8b24edc..657fa34 100644 --- a/src/ty/common.rs +++ b/src/ty/common.rs @@ -1,11 +1,7 @@ use std::fmt::{Display, Formatter, Result as FmtResult}; -use std::ops::Deref; - -use std::sync::LazyLock; use derive_more::Constructor; -use ecow::EcoString; -use regex::Regex; + #[derive(Clone, Debug, PartialEq, Constructor)] pub struct Catchable { @@ -18,46 +14,3 @@ impl Display for Catchable { } } -#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Constructor)] -pub struct Symbol(EcoString); - -impl> From for Symbol { - fn from(value: T) -> Self { - Symbol(value.into()) - } -} - -impl Display for Symbol { - fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { - if self.normal() { - write!(f, r#""{}""#, self.0) - } else { - write!(f, "{}", self.0) - } - } -} - -static REGEX: LazyLock = - LazyLock::new(|| Regex::new(r#"^[a-zA-Z\_][a-zA-Z0-9\_\'\-]*$"#).unwrap()); -impl Symbol { - fn normal(&self) -> bool { - !REGEX.is_match(self) - } -} - -impl Deref for Symbol { - type Target = str; - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl Symbol { - pub fn into_inner(self) -> EcoString { - self.0 - } - - pub fn as_inner(&self) -> &EcoString { - &self.0 - } -} diff --git a/src/ty/internal/attrset.rs b/src/ty/internal/attrset.rs index da1890e..9ca0b9f 100644 --- a/src/ty/internal/attrset.rs +++ b/src/ty/internal/attrset.rs @@ -5,15 +5,13 @@ use rpds::HashTrieMap; use crate::error::Result; use crate::vm::{Env, VM}; - -use super::super::common::Symbol; use super::super::public as p; use super::{ToPublic, Value}; #[repr(C)] #[derive(Debug, Constructor, Clone, PartialEq)] pub struct AttrSet<'vm> { - data: HashTrieMap>, + data: HashTrieMap>, } impl<'vm> AttrSet<'vm> { @@ -23,22 +21,22 @@ impl<'vm> AttrSet<'vm> { } } - pub fn push_attr_force(&mut self, sym: Symbol, val: Value<'vm>) { + pub fn push_attr_force(&mut self, sym: usize, val: Value<'vm>) { self.data.insert_mut(sym, val); } - pub fn push_attr(&mut self, sym: Symbol, val: Value<'vm>) { + pub fn push_attr(&mut self, sym: usize, val: Value<'vm>) { if self.data.get_mut(&sym).is_some() { todo!() } self.data.insert_mut(sym, val); } - pub fn select(&self, sym: &Symbol) -> Option> { + pub fn select(&self, sym: usize) -> Option> { self.data.get(&sym).cloned() } - pub fn has_attr(&self, sym: &Symbol) -> bool { + pub fn has_attr(&self, sym: usize) -> bool { self.data.get(&sym).is_some() } @@ -70,11 +68,11 @@ impl<'vm> AttrSet<'vm> { self } - pub fn into_inner(self) -> HashTrieMap> { + pub fn into_inner(self) -> HashTrieMap> { self.data } - pub fn as_inner(&self) -> &HashTrieMap> { + pub fn as_inner(&self) -> &HashTrieMap> { &self.data } @@ -98,7 +96,7 @@ impl ToPublic for AttrSet<'_> { p::Value::AttrSet(p::AttrSet::new( self.data .iter() - .map(|(sym, value)| (sym.clone(), value.clone().to_public(vm))) + .map(|(&sym, value)| (vm.get_sym(sym), value.clone().to_public(vm))) .collect(), )) } @@ -116,22 +114,22 @@ impl<'vm> RecAttrSet<'vm> { } } - pub fn push_attr_force(&mut self, sym: Symbol, val: Value<'vm>) { + pub fn push_attr_force(&mut self, sym: usize, val: Value<'vm>) { self.data.insert(sym, val); } - pub fn push_attr(&mut self, sym: Symbol, val: Value<'vm>) { - if self.data.lookup(&sym).is_some() { + pub fn push_attr(&mut self, sym: usize, val: Value<'vm>) { + if self.data.lookup(sym).is_some() { todo!() } self.data.insert(sym, val); } - pub fn select(&self, sym: &Symbol) -> Option> { + pub fn select(&self, sym: usize) -> Option> { self.data.lookup(sym) } - pub fn has_attr(&self, sym: &Symbol) -> bool { + pub fn has_attr(&self, sym: usize) -> bool { self.data.lookup(sym).is_some() } @@ -189,7 +187,7 @@ impl ToPublic for RecAttrSet<'_> { .map .borrow() .iter() - .map(|(sym, value)| (sym.clone(), value.clone().to_public(vm))) + .map(|(&sym, value)| (vm.get_sym(sym), value.clone().to_public(vm))) .collect(), )) } diff --git a/src/ty/internal/func.rs b/src/ty/internal/func.rs index 98c9751..f5578be 100644 --- a/src/ty/internal/func.rs +++ b/src/ty/internal/func.rs @@ -1,6 +1,5 @@ use std::rc::Rc; -use ecow::EcoString; use itertools::Itertools; use rpds::HashTrieMap; use derive_more::Constructor; @@ -13,11 +12,11 @@ use crate::vm::{Env, VM}; #[derive(Debug, Clone)] pub enum Param { - Ident(EcoString), + Ident(usize), Formals { - formals: Vec<(EcoString, Option)>, + formals: Vec<(usize, Option)>, ellipsis: bool, - alias: Option, + alias: Option, }, } @@ -69,7 +68,7 @@ impl<'vm> Func<'vm> { && arg .as_inner() .iter() - .map(|(k, _)| k.as_inner()) + .map(|(k, _)| k) .sorted() .ne(formals.iter().map(|(k, _)| k).sorted()) { @@ -78,7 +77,7 @@ impl<'vm> Func<'vm> { for (formal, default) in formals { let formal = formal.clone().into(); let arg = arg - .select(&formal) + .select(formal) .or_else(|| default.map(|idx| Value::Thunk(Thunk::new(vm.get_thunk(idx))))) .unwrap(); new.insert_mut(formal, arg); diff --git a/src/ty/internal/mod.rs b/src/ty/internal/mod.rs index 5e8aa4e..8b8beb3 100644 --- a/src/ty/internal/mod.rs +++ b/src/ty/internal/mod.rs @@ -7,8 +7,6 @@ use derive_more::{IsVariant, Unwrap}; use super::common as c; use super::public as p; -use c::Symbol; - use crate::bytecode::OpCodes; use crate::error::*; use crate::vm::{Env, VM}; @@ -285,7 +283,7 @@ impl<'vm> Value<'vm> { } } - pub fn push_attr(&mut self, sym: Symbol, val: Value<'vm>) -> &mut Self { + pub fn push_attr(&mut self, sym: usize, val: Value<'vm>) -> &mut Self { if let Value::AttrSet(attrs) = self { attrs.push_attr(sym, val) } else if let Value::RecAttrSet(attrs) = self { @@ -309,7 +307,7 @@ impl<'vm> Value<'vm> { } } - pub fn select(&mut self, sym: &Symbol) -> Result<&mut Self> { + pub fn select(&mut self, sym: usize) -> Result<&mut Self> { let val = match self { Value::AttrSet(attrs) => attrs .select(sym) @@ -327,7 +325,7 @@ impl<'vm> Value<'vm> { Ok(self) } - pub fn select_with_default(&mut self, sym: &Symbol, default: Value<'vm>) -> Result<&mut Self> { + pub fn select_with_default(&mut self, sym: usize, default: Value<'vm>) -> Result<&mut Self> { let val = match self { Value::AttrSet(attrs) => attrs.select(sym).unwrap_or(default), Value::RecAttrSet(attrs) => attrs.select(sym).unwrap_or(default), @@ -343,7 +341,7 @@ impl<'vm> Value<'vm> { Ok(self) } - pub fn has_attr(&mut self, sym: &Symbol) -> &mut Self { + pub fn has_attr(&mut self, sym: usize) -> &mut Self { if let Value::AttrSet(attrs) = self { let val = VmConst(Const::Bool(attrs.has_attr(sym))); *self = val; diff --git a/src/ty/public/mod.rs b/src/ty/public/mod.rs index 9c6cff9..d328f0c 100644 --- a/src/ty/public/mod.rs +++ b/src/ty/public/mod.rs @@ -1,7 +1,11 @@ use std::fmt::{Debug, Display, Formatter, Result as FmtResult}; +use std::ops::Deref; +use std::sync::LazyLock; use derive_more::{Constructor, IsVariant, Unwrap}; use rpds::{HashTrieMap, VectorSync}; +use ecow::EcoString; +use regex::Regex; use super::common::*; @@ -9,6 +13,50 @@ mod cnst; pub use cnst::Const; +#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Constructor)] +pub struct Symbol(EcoString); + +impl> From for Symbol { + fn from(value: T) -> Self { + Symbol(value.into()) + } +} + +impl Display for Symbol { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + if self.normal() { + write!(f, r#""{}""#, self.0) + } else { + write!(f, "{}", self.0) + } + } +} + +static REGEX: LazyLock = + LazyLock::new(|| Regex::new(r#"^[a-zA-Z\_][a-zA-Z0-9\_\'\-]*$"#).unwrap()); +impl Symbol { + fn normal(&self) -> bool { + !REGEX.is_match(self) + } +} + +impl Deref for Symbol { + type Target = str; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Symbol { + pub fn into_inner(self) -> EcoString { + self.0 + } + + pub fn as_inner(&self) -> &EcoString { + &self.0 + } +} + #[derive(Constructor, Clone, PartialEq)] pub struct AttrSet { data: HashTrieMap, diff --git a/src/vm/env.rs b/src/vm/env.rs index d42eff0..747df11 100644 --- a/src/vm/env.rs +++ b/src/vm/env.rs @@ -3,13 +3,12 @@ use std::rc::Rc; use rpds::HashTrieMap; -use crate::ty::common::Symbol; use crate::ty::internal::Value; #[derive(Debug, Default, PartialEq)] pub struct Env<'vm> { last: RefCell>>>, - pub map: RefCell>>, + pub map: RefCell>>, } impl Clone for Env<'_> { @@ -31,15 +30,15 @@ impl<'vm> Env<'vm> { Env::default() } - pub fn lookup(&self, symbol: &Symbol) -> Option> { - self.map.borrow().get(symbol).cloned() + pub fn lookup(&self, symbol: usize) -> Option> { + self.map.borrow().get(&symbol).cloned() } - pub fn insert(&self, symbol: Symbol, value: Value<'vm>) { + pub fn insert(&self, symbol: usize, value: Value<'vm>) { self.map.borrow_mut().insert_mut(symbol, value); } - pub fn enter(&self, new: HashTrieMap>) { + pub fn enter(&self, new: HashTrieMap>) { let mut map = self.map.borrow().clone(); for (k, v) in new.iter() { map.insert_mut(k.clone(), v.clone()); diff --git a/src/vm/mod.rs b/src/vm/mod.rs index f29958c..9db67d1 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -1,14 +1,17 @@ +use std::cell::RefCell; +use std::collections::HashMap; use std::rc::Rc; use crate::builtins::env; use crate::bytecode::{BinOp, OpCode, OpCodes, Program, UnOp, Func as F}; use crate::error::*; -use crate::ty::common::Symbol; use crate::ty::internal::*; -use crate::ty::public as p; +use crate::ty::public::{self as p, Symbol}; use crate::stack::Stack; +use derive_more::Constructor; +use ecow::EcoString; pub use env::Env; pub use jit::JITContext; @@ -24,37 +27,47 @@ pub fn run(prog: Program, jit: JITContext<'_>) -> Result { let vm = VM::new( prog.thunks, prog.funcs, + RefCell::new(prog.symbols), + RefCell::new(prog.symmap), jit ); - let env = env(); + let env = env(&vm); let temp = vm.eval(prog.top_level, env)?; let temp = temp.to_public(&vm); Ok(temp) } +#[derive(Constructor)] pub struct VM<'jit> { thunks: Box<[OpCodes]>, funcs: Box<[F]>, + symbols: RefCell>, + symmap: RefCell>, jit: JITContext<'jit>, } impl<'vm, 'jit: 'vm> VM<'jit> { - fn new(thunks: Box<[OpCodes]>, funcs: Box<[F]>, jit: JITContext<'jit>) -> Self { - VM { thunks, funcs, jit } - } - pub fn get_thunk(&self, idx: usize) -> &OpCodes { - // SAFETY: The `idx` is within bounds as `thunks` is initialized with `prog.thunks` - // and `idx` is expected to be a valid index into this collection. - // The lifetime of the returned reference is tied to `&self`. - unsafe { &*(&self.thunks[idx] as *const _) } + &self.thunks[idx] } pub fn get_func(&self, idx: usize) -> &F { - // SAFETY: The `idx` is within bounds as `funcs` is initialized with `prog.funcs` - // and `idx` is expected to be a valid index into this collection. - // The lifetime of the returned reference is tied to `&self`. - unsafe { &*(&self.funcs[idx] as *const _) } + &self.funcs[idx] + } + + pub fn get_sym(&self, idx: usize) -> Symbol{ + self.symbols.borrow()[idx].clone().into() + } + + pub fn new_sym(&self, sym: impl Into) -> usize { + let sym = sym.into(); + if let Some(&idx) = self.symmap.borrow().get(&sym) { + idx + } else { + self.symmap.borrow_mut().insert(sym.clone(), self.symbols.borrow().len()); + self.symbols.borrow_mut().push(sym); + self.symbols.borrow().len() - 1 + } } pub fn eval(&'vm self, opcodes: OpCodes, env: Rc>) -> Result> { @@ -123,6 +136,7 @@ impl<'vm, 'jit: 'vm> VM<'jit> { let mut value = stack.pop(); value.force(self)?; stack.push(match op { + Neg => value.neg(), Not => value.not(), })?; } @@ -134,9 +148,13 @@ impl<'vm, 'jit: 'vm> VM<'jit> { rhs.force(self)?; stack.push(match op { Add => lhs.add(rhs), + Sub => lhs.add(rhs.neg()), + Mul => lhs.mul(rhs), + Div => lhs.div(rhs), And => lhs.and(rhs), Or => lhs.or(rhs), Eq => lhs.eq(rhs), + Lt => lhs.lt(rhs), Con => lhs.concat(rhs), Upd => lhs.update(rhs), })?; @@ -169,57 +187,56 @@ impl<'vm, 'jit: 'vm> VM<'jit> { } OpCode::PushStaticAttr { name } => { let val = stack.pop(); - stack.tos_mut()?.push_attr(Symbol::new(name.clone()), val); + stack.tos_mut()?.push_attr(name, val); } OpCode::PushDynamicAttr => { let val = stack.pop(); let mut sym = stack.pop(); sym.force(self)?.coerce_to_string(); - let sym = sym.unwrap_const().unwrap_string().into(); + let sym = self.new_sym(sym.unwrap_const().unwrap_string()); stack.tos_mut()?.push_attr(sym, val); } OpCode::Select { sym } => { - stack.tos_mut()?.force(self)?.select(&Symbol::new(sym))?; + stack.tos_mut()?.force(self)?.select(sym)?; } OpCode::SelectOrDefault { sym } => { let default = stack.pop(); stack .tos_mut()? .force(self)? - .select_with_default(&Symbol::new(sym), default)?; + .select_with_default(sym, default)?; } OpCode::SelectDynamic => { let mut val = stack.pop(); val.force(self)?; val.coerce_to_string(); - let sym = val.unwrap_const().unwrap_string().into(); - stack.tos_mut()?.force(self)?.select(&sym)?; + let sym = self.new_sym(val.unwrap_const().unwrap_string()); + stack.tos_mut()?.force(self)?.select(sym)?; } OpCode::SelectDynamicOrDefault => { let default = stack.pop(); let mut val = stack.pop(); val.force(self)?; val.coerce_to_string(); - let sym = val.unwrap_const().unwrap_string().into(); + let sym = self.new_sym(val.unwrap_const().unwrap_string()); stack .tos_mut()? .force(self)? - .select_with_default(&sym, default)?; + .select_with_default(sym, default)?; } OpCode::HasAttr { sym } => { - stack.tos_mut()?.force(self)?.has_attr(&Symbol::new(sym)); + stack.tos_mut()?.force(self)?.has_attr(sym); } OpCode::HasDynamicAttr => { let mut val = stack.pop(); val.coerce_to_string(); - let sym = val.unwrap_const().unwrap_string().into(); - stack.tos_mut()?.force(self)?.has_attr(&sym); + let sym = self.new_sym(val.unwrap_const().unwrap_string()); + stack.tos_mut()?.force(self)?.has_attr(sym); } OpCode::LookUp { sym } => { - let sym = Symbol::new(sym); stack.push( - env.lookup(&sym) - .ok_or_else(|| Error::EvalError(format!(r#""{sym}" not found"#)))?, + env.lookup(sym) + .ok_or_else(|| Error::EvalError(format!("{} not found", self.get_sym(sym))))?, )?; } OpCode::EnterEnv => match stack.pop() { diff --git a/src/vm/test.rs b/src/vm/test.rs index 6b0d309..fe49255 100644 --- a/src/vm/test.rs +++ b/src/vm/test.rs @@ -8,7 +8,7 @@ use rpds::{ht_map, vector_sync}; use crate::compile::compile; use crate::ir::downgrade; -use crate::ty::common::Symbol; +use crate::ty::public::Symbol; use crate::ty::public::*; use crate::vm::JITContext;