feat: init Rust VM

This commit is contained in:
2026-03-14 11:42:39 +08:00
parent 40d00a6c47
commit 198d847151
26 changed files with 3620 additions and 993 deletions
+530
View File
@@ -1 +1,531 @@
use std::hash::BuildHasher;
use bumpalo::Bump;
use gc_arena::{Arena, Rootable, arena::CollectionPhase};
use ghost_cell::{GhostCell, GhostToken};
use hashbrown::{DefaultHashBuilder, HashMap, HashSet, HashTable};
use rnix::TextRange;
use string_interner::DefaultStringInterner;
use crate::codegen::{BytecodeContext, InstructionPtr};
use crate::downgrade::{Downgrade as _, DowngradeContext};
use crate::error::{Error, Result, Source};
use crate::ir::{ArgId, Ir, IrKey, IrRef, RawIrRef, StringId, ThunkId, ir_content_eq};
use crate::runtime::builtins::new_builtins_env;
use crate::store::{DaemonStore, StoreConfig};
use crate::value::Symbol;
mod builtins;
mod stack;
mod value;
mod vm;
use vm::{Action, VM};
pub struct Runtime {
bytecode: Vec<u8>,
global_env: HashMap<StringId, Ir<'static, RawIrRef<'static>>>,
sources: Vec<Source>,
store: DaemonStore,
spans: Vec<(usize, TextRange)>,
thunk_count: usize,
strings: DefaultStringInterner,
arena: Arena<Rootable![VM<'_>]>,
}
impl Runtime {
const COLLECTOR_GRANULARITY: f64 = 1024.0;
pub fn new() -> Result<Self> {
let mut strings = DefaultStringInterner::new();
let global_env = new_builtins_env(&mut strings);
let config = StoreConfig::from_env();
let store = DaemonStore::connect(&config.daemon_socket)?;
Ok(Self {
global_env,
store,
strings,
thunk_count: 0,
bytecode: Vec::new(),
sources: Vec::new(),
spans: Vec::new(),
arena: Arena::new(|mc| VM::new(mc)),
})
}
pub fn eval(&mut self, source: Source) -> Result<crate::value::Value> {
let root = self.downgrade(source, None)?;
let ip = crate::codegen::compile_bytecode(root.as_ref(), self);
self.run(ip)
}
pub fn eval_shallow(&mut self, _source: Source) -> Result<crate::value::Value> {
todo!()
}
pub fn eval_deep(&mut self, source: Source) -> Result<crate::value::Value> {
// FIXME: deep
let root = self.downgrade(source, None)?;
let ip = crate::codegen::compile_bytecode(root.as_ref(), self);
self.run(ip)
}
pub fn eval_repl(
&mut self,
source: Source,
scope: &HashSet<StringId>,
) -> Result<crate::value::Value> {
// FIXME: shallow
let root = self.downgrade(source, Some(Scope::Repl(scope)))?;
let ip = crate::codegen::compile_bytecode(root.as_ref(), self);
self.run(ip)
}
pub fn add_binding(
&mut self,
_ident: &str,
_expr: &str,
_scope: &mut HashSet<StringId>,
) -> Result<crate::value::Value> {
todo!()
}
fn downgrade_ctx<'a, 'bump, 'id>(
&'a mut self,
bump: &'bump Bump,
token: GhostToken<'id>,
extra_scope: Option<Scope<'a>>,
) -> DowngradeCtx<'a, 'id, 'bump> {
let Runtime {
global_env,
sources,
thunk_count,
strings,
..
} = self;
DowngradeCtx {
bump,
token,
strings,
source: sources.last().unwrap().clone(),
scopes: [Scope::Global(global_env)].into_iter().chain(extra_scope.into_iter()).collect(),
with_scope_count: 0,
arg_count: 0,
thunk_count,
thunk_scopes: vec![ThunkScope::new_in(bump)],
}
}
fn downgrade<'a>(&'a mut self, source: Source, extra_scope: Option<Scope<'a>>) -> Result<OwnedIr> {
tracing::debug!("Parsing Nix expression");
self.sources.push(source.clone());
let root = rnix::Root::parse(&source.src);
handle_parse_error(root.errors(), source).map_or(Ok(()), Err)?;
tracing::debug!("Downgrading Nix expression");
let expr = root
.tree()
.expr()
.ok_or_else(|| Error::parse_error("unexpected EOF".into()))?;
let bump = Bump::new();
GhostToken::new(|token| {
let ir = self
.downgrade_ctx(&bump, token, extra_scope)
.downgrade_toplevel(expr)?;
let ir = unsafe { std::mem::transmute::<RawIrRef<'_>, RawIrRef<'static>>(ir) };
Ok(OwnedIr { _bump: bump, ir })
})
}
fn run(&mut self, ip: InstructionPtr) -> Result<crate::value::Value> {
let mut pc = ip.0;
loop {
let Runtime {
bytecode,
strings,
arena,
..
} = self;
let action =
arena.mutate_root(|mc, root| root.run_batch(bytecode, &mut pc, mc, strings));
match action {
Action::NeedGc => {
if self.arena.collection_phase() == CollectionPhase::Sweeping {
self.arena.collect_debt();
} else if let Some(marked) = self.arena.mark_debt() {
marked.start_sweeping();
}
}
Action::Done(done) => {
break done;
}
Action::Continue => (),
Action::IoRequest(_) => todo!(),
}
}
}
}
fn parse_error_span(error: &rnix::ParseError) -> Option<rnix::TextRange> {
use rnix::ParseError::*;
match error {
Unexpected(range)
| UnexpectedExtra(range)
| UnexpectedWanted(_, range, _)
| UnexpectedDoubleBind(range)
| DuplicatedArgs(range, _) => Some(*range),
_ => None,
}
}
fn handle_parse_error<'a>(
errors: impl IntoIterator<Item = &'a rnix::ParseError>,
source: Source,
) -> Option<Box<Error>> {
for err in errors {
if let Some(span) = parse_error_span(err) {
return Some(
Error::parse_error(err.to_string())
.with_source(source)
.with_span(span),
);
}
}
None
}
struct DowngradeCtx<'ctx, 'id, 'ir> {
bump: &'ir Bump,
token: GhostToken<'id>,
strings: &'ctx mut DefaultStringInterner,
source: Source,
scopes: Vec<Scope<'ctx>>,
with_scope_count: u32,
arg_count: u32,
thunk_count: &'ctx mut usize,
thunk_scopes: Vec<ThunkScope<'id, 'ir>>,
}
fn should_thunk<'id>(ir: IrRef<'id, '_>, token: &GhostToken<'id>) -> bool {
!matches!(
ir.borrow(token),
Ir::Builtin(_)
| Ir::Builtins
| Ir::Int(_)
| Ir::Float(_)
| Ir::Bool(_)
| Ir::Null
| Ir::Str(_)
| Ir::Thunk(_)
)
}
impl<'ctx, 'id, 'ir> DowngradeCtx<'ctx, 'id, 'ir> {
fn new(
bump: &'ir Bump,
token: GhostToken<'id>,
symbols: &'ctx mut DefaultStringInterner,
global: &'ctx HashMap<StringId, Ir<'static, RawIrRef<'static>>>,
extra_scope: Option<Scope<'ctx>>,
thunk_count: &'ctx mut usize,
source: Source,
) -> Self {
Self {
bump,
token,
strings: symbols,
source,
scopes: std::iter::once(Scope::Global(global))
.chain(extra_scope)
.collect(),
thunk_count,
arg_count: 0,
with_scope_count: 0,
thunk_scopes: vec![ThunkScope::new_in(bump)],
}
}
}
impl<'ctx: 'ir, 'id, 'ir> DowngradeContext<'id, 'ir> for DowngradeCtx<'ctx, 'id, 'ir> {
fn new_expr(&self, expr: Ir<'ir, IrRef<'id, 'ir>>) -> IrRef<'id, 'ir> {
IrRef::new(self.bump.alloc(GhostCell::new(expr)))
}
fn new_arg(&mut self) -> ArgId {
self.arg_count += 1;
ArgId(self.arg_count - 1)
}
fn maybe_thunk(&mut self, ir: IrRef<'id, 'ir>) -> IrRef<'id, 'ir> {
if !should_thunk(ir, &self.token) {
return ir;
}
let cached = self
.thunk_scopes
.last()
.expect("no active cache scope")
.lookup_cache(ir, &self.token);
if let Some(id) = cached {
return IrRef::alloc(self.bump, Ir::Thunk(id));
}
let id = ThunkId(*self.thunk_count);
*self.thunk_count = self.thunk_count.checked_add(1).expect("thunk id overflow");
self.thunk_scopes
.last_mut()
.expect("no active cache scope")
.add_binding(id, ir, &self.token);
IrRef::alloc(self.bump, Ir::Thunk(id))
}
fn new_sym(&mut self, sym: String) -> StringId {
StringId(self.strings.get_or_intern(sym))
}
fn get_sym(&self, id: StringId) -> Symbol<'_> {
self.strings.resolve(id.0).expect("no symbol found").into()
}
fn lookup(&self, sym: StringId, span: TextRange) -> Result<IrRef<'id, 'ir>> {
for scope in self.scopes.iter().rev() {
match scope {
&Scope::Global(global_scope) => {
if let Some(expr) = global_scope.get(&sym) {
let ir = match expr {
Ir::Builtins => Ir::Builtins,
Ir::Builtin(s) => Ir::Builtin(*s),
Ir::Bool(b) => Ir::Bool(*b),
Ir::Null => Ir::Null,
_ => unreachable!("globals should only contain leaf IR nodes"),
};
return Ok(self.new_expr(ir));
}
}
&Scope::Repl(repl_bindings) => {
if repl_bindings.contains(&sym) {
return Ok(self.new_expr(Ir::ReplBinding(sym)));
}
}
Scope::ScopedImport(scoped_bindings) => {
if scoped_bindings.contains(&sym) {
return Ok(self.new_expr(Ir::ScopedImportBinding(sym)));
}
}
Scope::Let(let_scope) => {
if let Some(&expr) = let_scope.get(&sym) {
return Ok(self.new_expr(Ir::Thunk(expr)));
}
}
&Scope::Param(param_sym, id) => {
if param_sym == sym {
return Ok(self.new_expr(Ir::Arg(id)));
}
}
}
}
if self.with_scope_count > 0 {
Ok(self.new_expr(Ir::WithLookup(sym)))
} else {
Err(Error::downgrade_error(
format!("'{}' not found", self.get_sym(sym)),
self.get_current_source(),
span,
))
}
}
fn get_current_source(&self) -> Source {
self.source.clone()
}
fn with_let_scope<F, R>(&mut self, keys: &[StringId], f: F) -> Result<R>
where
F: FnOnce(&mut Self) -> Result<(bumpalo::collections::Vec<'ir, IrRef<'id, 'ir>>, R)>,
{
let base = *self.thunk_count;
*self.thunk_count = self
.thunk_count
.checked_add(keys.len())
.expect("thunk id overflow");
let iter = keys.iter().enumerate().map(|(offset, &key)| {
(
key,
ThunkId(unsafe { base.checked_add(offset).unwrap_unchecked() }),
)
});
self.scopes.push(Scope::Let(iter.collect()));
let (vals, ret) = {
let mut guard = ScopeGuard { ctx: self };
f(guard.as_ctx())?
};
assert_eq!(keys.len(), vals.len());
let scope = self.thunk_scopes.last_mut().expect("no active thunk scope");
scope.extend_bindings((base..base + keys.len()).map(ThunkId).zip(vals));
Ok(ret)
}
fn with_param_scope<F, R>(&mut self, param: StringId, arg: ArgId, f: F) -> R
where
F: FnOnce(&mut Self) -> R,
{
self.scopes.push(Scope::Param(param, arg));
let mut guard = ScopeGuard { ctx: self };
f(guard.as_ctx())
}
fn with_with_scope<F, R>(&mut self, f: F) -> R
where
F: FnOnce(&mut Self) -> R,
{
self.with_scope_count += 1;
let ret = f(self);
self.with_scope_count -= 1;
ret
}
fn with_thunk_scope<F, R>(
&mut self,
f: F,
) -> (
R,
bumpalo::collections::Vec<'ir, (ThunkId, IrRef<'id, 'ir>)>,
)
where
F: FnOnce(&mut Self) -> R,
{
self.thunk_scopes.push(ThunkScope::new_in(self.bump));
let ret = f(self);
(
ret,
self.thunk_scopes
.pop()
.expect("no thunk scope left???")
.bindings,
)
}
fn bump(&self) -> &'ir bumpalo::Bump {
self.bump
}
}
impl<'id, 'ir, 'ctx: 'ir> DowngradeCtx<'ctx, 'id, 'ir> {
fn downgrade_toplevel(mut self, root: rnix::ast::Expr) -> Result<RawIrRef<'ir>> {
let body = root.downgrade(&mut self)?;
let thunks = self
.thunk_scopes
.pop()
.expect("no thunk scope left???")
.bindings;
let ir = IrRef::alloc(self.bump, Ir::TopLevel { body, thunks });
Ok(ir.freeze(self.token))
}
}
struct ThunkScope<'id, 'ir> {
bindings: bumpalo::collections::Vec<'ir, (ThunkId, IrRef<'id, 'ir>)>,
cache: HashTable<(IrRef<'id, 'ir>, ThunkId)>,
hasher: DefaultHashBuilder,
}
impl<'id, 'ir> ThunkScope<'id, 'ir> {
fn new_in(bump: &'ir Bump) -> Self {
Self {
bindings: bumpalo::collections::Vec::new_in(bump),
cache: HashTable::new(),
hasher: DefaultHashBuilder::default(),
}
}
fn lookup_cache(&self, key: IrRef<'id, 'ir>, token: &GhostToken<'id>) -> Option<ThunkId> {
let hash = self.hasher.hash_one(IrKey(key, token));
self.cache
.find(hash, |&(ir, _)| ir_content_eq(key, ir, token))
.map(|&(_, id)| id)
}
fn add_binding(&mut self, id: ThunkId, ir: IrRef<'id, 'ir>, token: &GhostToken<'id>) {
self.bindings.push((id, ir));
let hash = self.hasher.hash_one(IrKey(ir, token));
self.cache.insert_unique(hash, (ir, id), |&(ir, _)| {
self.hasher.hash_one(IrKey(ir, token))
});
}
fn extend_bindings(&mut self, iter: impl IntoIterator<Item = (ThunkId, IrRef<'id, 'ir>)>) {
self.bindings.extend(iter);
}
}
enum Scope<'ctx> {
Global(&'ctx HashMap<StringId, Ir<'static, RawIrRef<'static>>>),
Repl(&'ctx HashSet<StringId>),
ScopedImport(HashSet<StringId>),
Let(HashMap<StringId, ThunkId>),
Param(StringId, ArgId),
}
struct ScopeGuard<'a, 'ctx, 'id, 'ir> {
ctx: &'a mut DowngradeCtx<'ctx, 'id, 'ir>,
}
impl Drop for ScopeGuard<'_, '_, '_, '_> {
fn drop(&mut self) {
self.ctx.scopes.pop();
}
}
impl<'id, 'ir, 'ctx> ScopeGuard<'_, 'ctx, 'id, 'ir> {
fn as_ctx(&mut self) -> &mut DowngradeCtx<'ctx, 'id, 'ir> {
self.ctx
}
}
struct OwnedIr {
_bump: Bump,
ir: RawIrRef<'static>,
}
impl OwnedIr {
unsafe fn new(ir: RawIrRef<'_>, bump: Bump) -> Self {
Self {
_bump: bump,
ir: unsafe { std::mem::transmute::<RawIrRef<'_>, RawIrRef<'static>>(ir) }
}
}
fn as_ref(&self) -> RawIrRef<'_> {
self.ir
}
}
impl BytecodeContext for Runtime {
fn intern_string(&mut self, s: &str) -> StringId {
StringId(self.strings.get_or_intern(s))
}
fn register_span(&mut self, range: TextRange) -> u32 {
let id = self.spans.len();
let source_id = self
.sources
.len()
.checked_sub(1)
.expect("current_source not set");
self.spans.push((source_id, range));
id as u32
}
fn get_code(&self) -> &[u8] {
&self.bytecode
}
fn get_code_mut(&mut self) -> &mut Vec<u8> {
&mut self.bytecode
}
}