#![warn(clippy::unwrap_used)] #![allow(dead_code)] use bumpalo::Bump; use fix_builtins::PrimOpPhase; use fix_codegen::disassembler::{Disassembler, DisassemblerContext}; use fix_codegen::{BytecodeContext, InstructionPtr, Op}; use fix_common::{StringId, Symbol}; use fix_error::{Error, Result, Source}; use fix_ir::downgrade::{Downgrade as _, DowngradeContext}; use fix_ir::{ GhostMaybeThunkRef, GhostRoIrRef, GhostRoMaybeThunkRef, GhostRoRef, Ir, MaybeThunk, RawIrRef, ThunkId, }; use fix_vm::{ForceMode, StaticValue, Vm, VmCode, VmContext, VmRuntimeCtx}; use ghost_cell::{GhostCell, GhostToken}; use hashbrown::{HashMap, HashSet}; use string_interner::{DefaultStringInterner, Symbol as _}; mod derivation; pub mod logging; #[global_allocator] static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; pub struct RuntimeState { pub strings: DefaultStringInterner, pub constants: Constants, } pub struct CodeState { pub bytecode: Vec, pub sources: Vec, pub spans: Vec<(usize, rnix::TextRange)>, pub thunk_count: usize, pub global_env: HashMap, } pub struct Evaluator { pub runtime: RuntimeState, pub code: CodeState, } impl Default for Evaluator { fn default() -> Self { Self::new() } } impl Evaluator { pub fn new() -> Self { let mut strings = DefaultStringInterner::new(); let global_env = fix_ir::new_global_env(&mut strings); let mut bytecode = Vec::with_capacity(PrimOpPhase::Illegal as usize * 2); for phase in 0..=PrimOpPhase::Illegal as u8 { bytecode.push(Op::DispatchPrimOp as u8); bytecode.push(phase); } Self { runtime: RuntimeState { strings, constants: Constants::default(), }, code: CodeState { sources: Vec::new(), spans: Vec::new(), thunk_count: 0, bytecode, global_env, }, } } pub fn eval(&mut self, source: Source) -> Result { self.do_eval(source, None, ForceMode::AsIs) } pub fn eval_shallow(&mut self, source: Source) -> Result { self.do_eval(source, None, ForceMode::Shallow) } pub fn eval_deep(&mut self, source: Source) -> Result { self.do_eval(source, None, ForceMode::Deep) } pub fn eval_repl( &mut self, source: Source, scope: &HashSet, ) -> Result { self.do_eval(source, Some(ExtraScope::Repl(scope)), ForceMode::Shallow) } fn do_eval<'ctx>( &'ctx mut self, source: Source, extra_scope: Option>, force_mode: ForceMode, ) -> Result { let ip = { let mut compiler = CompilerCtx { code: &mut self.code, runtime: &mut self.runtime, }; compiler.compile_bytecode(source, extra_scope)? }; Vm::run(self, ip, force_mode) } pub fn add_binding( &mut self, _ident: &str, _expr: &str, _scope: &mut HashSet, ) -> Result { todo!("add_binding") } pub fn compile_bytecode(&mut self, source: Source) -> Result { let mut compiler = CompilerCtx { code: &mut self.code, runtime: &mut self.runtime, }; compiler.compile_bytecode(source, None) } pub fn disassemble_colored(&self, ip: InstructionPtr) -> String { Disassembler::new(ip, self).disassemble_colored() } } impl VmRuntimeCtx for RuntimeState { fn intern_string(&mut self, s: impl AsRef) -> StringId { StringId(self.strings.get_or_intern(s)) } fn resolve_string(&self, id: StringId) -> &str { #[allow(clippy::unwrap_used)] self.strings.resolve(id.0).unwrap() } fn get_const(&self, id: u32) -> StaticValue { #[allow(clippy::unwrap_used)] self.constants.get(id).unwrap() } fn add_const(&mut self, val: StaticValue) -> u32 { self.constants.insert(val) } } impl VmCode for CodeState { fn bytecode(&self) -> &[u8] { &self.bytecode } fn compile( &mut self, source: Source, runtime: &mut impl VmRuntimeCtx, ) -> Result { let mut compiler = CompilerCtx { code: self, runtime, }; compiler.compile_bytecode(source, None) } } impl VmContext for Evaluator { fn split(&mut self) -> (&mut impl VmCode, &mut impl VmRuntimeCtx) { (&mut self.code, &mut self.runtime) } } struct CompilerCtx<'a, R: VmRuntimeCtx> { code: &'a mut CodeState, runtime: &'a mut R, } impl<'a, R: VmRuntimeCtx> CompilerCtx<'a, R> { fn compile_bytecode( &mut self, source: Source, extra_scope: Option, ) -> Result { let root = self.downgrade(source, extra_scope)?; let ip = fix_codegen::compile_bytecode(root.as_ref(), self); Ok(ip) } fn downgrade(&mut self, source: Source, extra_scope: Option) -> Result { tracing::debug!("Parsing Nix expression"); self.code.sources.push(source.clone()); let root = rnix::Root::parse(&source.src); handle_parse_error(root.errors(), source.clone()).map_or(Ok(()), Err)?; tracing::debug!("Downgrading Nix expression"); let expr = root .tree() .expr() .ok_or_else(|| Error::parse_error("unexpected EOF".into()))?; let bump = Bump::new(); GhostToken::new(|token| { let downgrade_ctx = DowngradeCtx::new( &bump, token, self.runtime, &self.code.global_env, extra_scope.map(Into::into), &mut self.code.thunk_count, source, ); let ir = downgrade_ctx.downgrade_toplevel(expr)?; let ir = unsafe { std::mem::transmute::, RawIrRef<'static>>(ir) }; Ok(OwnedIr { _bump: bump, ir }) }) } } impl<'a, R: VmRuntimeCtx> BytecodeContext for CompilerCtx<'a, R> { fn intern_string(&mut self, s: &str) -> StringId { self.runtime.intern_string(s) } fn register_span(&mut self, range: rnix::TextRange) -> u32 { let id = self.code.spans.len(); let source_id = self .code .sources .len() .checked_sub(1) .expect("current_source not set"); self.code.spans.push((source_id, range)); id as u32 } fn get_code(&self) -> &[u8] { &self.code.bytecode } fn get_code_mut(&mut self) -> &mut Vec { &mut self.code.bytecode } fn add_constant(&mut self, val: fix_codegen::Const) -> u32 { use fix_codegen::Const::*; let val = match val { Smi(x) => StaticValue::new_inline(x), Float(x) => StaticValue::new_float(x), Bool(x) => StaticValue::new_inline(x), String(x) => StaticValue::new_inline(x), Path(_) => todo!("path value type"), PrimOp { id, arity, dispatch_ip, } => StaticValue::new_primop(id, arity, dispatch_ip), Null => StaticValue::default(), }; self.runtime.add_const(val) } } #[derive(Default)] pub struct Constants { data: Vec, dedup: HashMap, } impl Constants { fn insert(&mut self, val: StaticValue) -> u32 { let bits = val.to_bits(); *self.dedup.entry(bits).or_insert_with(|| { let idx = self.data.len() as u32; self.data.push(val); idx }) } fn get(&self, id: u32) -> Option { self.data.get(id as usize).copied() } } fn parse_error_span(error: &rnix::ParseError) -> Option { use rnix::ParseError::*; match error { Unexpected(range) | UnexpectedExtra(range) | UnexpectedWanted(_, range, _) | UnexpectedDoubleBind(range) | DuplicatedArgs(range, _) => Some(*range), _ => None, } } fn handle_parse_error<'a>( errors: impl IntoIterator, source: Source, ) -> Option> { for err in errors { if let Some(span) = parse_error_span(err) { return Some( Error::parse_error(err.to_string()) .with_source(source) .with_span(span), ); } } None } struct DowngradeCtx<'ctx, 'id, 'ir, R: VmRuntimeCtx> { bump: &'ir Bump, token: GhostToken<'id>, runtime: &'ctx mut R, source: Source, scopes: Vec>, with_stack: Vec>, arg_count: u32, thunk_count: &'ctx mut usize, thunk_scopes: Vec>, } impl<'ctx, 'id, 'ir, R: VmRuntimeCtx> DowngradeCtx<'ctx, 'id, 'ir, R> { fn new( bump: &'ir Bump, token: GhostToken<'id>, runtime: &'ctx mut R, global: &'ctx HashMap, extra_scope: Option>, thunk_count: &'ctx mut usize, source: Source, ) -> Self { Self { bump, token, runtime, source, scopes: std::iter::once(Scope::Global(global)) .chain(extra_scope) .collect(), thunk_count, arg_count: 0, with_stack: Vec::new(), thunk_scopes: vec![ThunkScope::new_in(bump)], } } } impl<'ctx: 'ir, 'id, 'ir, R: VmRuntimeCtx> DowngradeContext<'id, 'ir> for DowngradeCtx<'ctx, 'id, 'ir, R> { fn new_expr(&self, expr: Ir<'ir, GhostRoRef<'id, 'ir>>) -> GhostRoIrRef<'id, 'ir> { self.bump.alloc(GhostCell::new(expr).into()) } fn maybe_thunk(&mut self, ir: GhostRoIrRef<'id, 'ir>) -> GhostRoMaybeThunkRef<'id, 'ir> { use MaybeThunk::*; let expr = (|| { let expr = match *ir.borrow(&self.token) { Ir::Builtin(x) => Builtin(x), Ir::Int(x) => Int(x), Ir::Float(x) => Float(x), Ir::Bool(x) => Bool(x), Ir::Str(x) => Str(x), Ir::Arg { layer } => Arg { layer }, Ir::Builtins => Builtins, Ir::Null => Null, Ir::MaybeThunk(thunk) => return Some(thunk), _ => return None, }; Some(self.bump.alloc(GhostCell::new(expr).into())) })(); if let Some(thunk) = expr { return thunk; } let id = ThunkId(*self.thunk_count); *self.thunk_count = self.thunk_count.checked_add(1).expect("thunk id overflow"); self.thunk_scopes .last_mut() .expect("no active cache scope") .add_binding(id, ir); self.bump.alloc(GhostCell::new(Thunk(id)).into()) } fn intern_string(&mut self, sym: impl AsRef) -> StringId { self.runtime.intern_string(sym) } fn resolve_sym(&self, id: StringId) -> Symbol<'_> { self.runtime.resolve_string(id).into() } fn lookup( &mut self, sym: StringId, span: rnix::TextRange, ) -> Result> { for scope in self.scopes.iter().rev() { match scope { &Scope::Global(global_scope) => { if let Some(expr) = global_scope.get(&sym) { return Ok(expr.into()); } } &Scope::Repl(repl_bindings) => { if repl_bindings.contains(&sym) { return Ok(self .bump .alloc(GhostCell::new(MaybeThunk::ReplBinding(sym)).into())); } } Scope::ScopedImport(scoped_bindings) => { if scoped_bindings.contains(&sym) { return Ok(self .bump .alloc(GhostCell::new(MaybeThunk::ScopedImportBinding(sym)).into())); } } Scope::Let(let_scope) => { if let Some(&expr) = let_scope.get(&sym) { return Ok(expr.into()); } } &Scope::Param { sym: param_sym, abs_layer, } => { if param_sym == sym { let layers: u8 = self.thunk_scopes.len().try_into().expect("scope too deep!"); let layer = layers - abs_layer; return Ok(self .bump .alloc(GhostCell::new(MaybeThunk::Arg { layer }).into())); } } } } if !self.with_stack.is_empty() { let id = ThunkId(*self.thunk_count); *self.thunk_count = self.thunk_count.checked_add(1).expect("thunk id overflow"); let mut namespaces = bumpalo::collections::Vec::with_capacity_in(self.with_stack.len(), self.bump); namespaces.extend(self.with_stack.iter().rev().copied()); let body = self .bump .alloc(GhostCell::new(Ir::WithLookup { sym, namespaces }).into()); self.thunk_scopes .last_mut() .expect("no active thunk scope") .add_binding(id, body); Ok(self .bump .alloc(GhostCell::new(MaybeThunk::Thunk(id)).into())) } else { Err(Error::downgrade_error( format!("'{}' not found", self.resolve_sym(sym)), self.get_current_source(), span, )) } } fn get_current_source(&self) -> Source { self.source.clone() } fn with_let_scope(&mut self, keys: &[StringId], f: F) -> Result where F: FnOnce( &mut Self, ) -> Result<( bumpalo::collections::Vec<'ir, GhostRoMaybeThunkRef<'id, 'ir>>, Ret, )>, { let base = *self.thunk_count; *self.thunk_count = self .thunk_count .checked_add(keys.len()) .expect("thunk id overflow"); let handles = (base..base + keys.len()) .map(|id| { &*self .bump .alloc(GhostCell::new(MaybeThunk::Thunk(ThunkId(id)))) }) .collect::>(); let scope = keys.iter().copied().zip(handles.iter().copied()).collect(); self.scopes.push(Scope::Let(scope)); let (vals, ret) = { f(self)? }; self.scopes.pop(); assert_eq!(keys.len(), vals.len()); let scope = self.thunk_scopes.last_mut().expect("no active thunk scope"); for (i, (val, handle)) in vals.into_iter().zip(handles).enumerate() { let thunk = *val.borrow(&self.token); *handle.borrow_mut(&mut self.token) = thunk; let id = ThunkId(base + i); let ir_ref = self .bump .alloc(GhostCell::new(Ir::MaybeThunk(handle.into())).into()); scope.add_binding(id, ir_ref); } Ok(ret) } fn with_param_scope(&mut self, sym: StringId, f: F) -> Ret where F: FnOnce(&mut Self) -> Ret, { self.scopes.push(Scope::Param { sym, abs_layer: self.thunk_scopes.len().try_into().expect("scope too deep!"), }); let mut guard = ScopeGuard { ctx: self }; f(guard.as_ctx()) } fn with_with_scope(&mut self, namespace: GhostRoMaybeThunkRef<'id, 'ir>, f: F) -> Ret where F: FnOnce(&mut Self) -> Ret, { self.with_stack.push(namespace); let ret = f(self); self.with_stack.pop(); ret } fn with_thunk_scope( &mut self, f: F, ) -> ( Ret, bumpalo::collections::Vec<'ir, (ThunkId, GhostRoIrRef<'id, 'ir>)>, ) where F: FnOnce(&mut Self) -> Ret, { if self.thunk_scopes.len() == u8::MAX as usize { panic!("scope too deep!"); } self.thunk_scopes.push(ThunkScope::new_in(self.bump)); let ret = f(self); ( ret, self.thunk_scopes .pop() .expect("no thunk scope left???") .bindings, ) } fn bump(&self) -> &'ir bumpalo::Bump { self.bump } } impl<'id, 'ir, 'ctx: 'ir, R: VmRuntimeCtx> DowngradeCtx<'ctx, 'id, 'ir, R> { fn downgrade_toplevel(mut self, root: rnix::ast::Expr) -> Result> { let body = root.downgrade(&mut self)?; let thunks = self .thunk_scopes .pop() .expect("no thunk scope left???") .bindings; Ok(Ir::freeze( self.new_expr(Ir::TopLevel { body, thunks }), self.token, )) } } struct ThunkScope<'id, 'ir> { bindings: bumpalo::collections::Vec<'ir, (ThunkId, GhostRoIrRef<'id, 'ir>)>, } impl<'id, 'ir> ThunkScope<'id, 'ir> { fn new_in(bump: &'ir Bump) -> Self { Self { bindings: bumpalo::collections::Vec::new_in(bump), } } fn add_binding(&mut self, id: ThunkId, ir: GhostRoIrRef<'id, 'ir>) { self.bindings.push((id, ir)); } fn extend_bindings( &mut self, iter: impl IntoIterator)>, ) { self.bindings.extend(iter); } } enum Scope<'ctx, 'id, 'ir> { Global(&'ctx HashMap), Repl(&'ctx HashSet), ScopedImport(HashSet), Let(HashMap>), Param { sym: StringId, abs_layer: u8 }, } enum ExtraScope<'ctx> { Repl(&'ctx HashSet), ScopedImport(HashSet), } impl<'ctx> From> for Scope<'ctx, '_, '_> { fn from(value: ExtraScope<'ctx>) -> Self { use ExtraScope::*; match value { ScopedImport(scope) => Scope::ScopedImport(scope), Repl(scope) => Scope::Repl(scope), } } } struct ScopeGuard<'a, 'ctx, 'id, 'ir, R: VmRuntimeCtx> { ctx: &'a mut DowngradeCtx<'ctx, 'id, 'ir, R>, } impl<'a, 'ctx, 'id, 'ir, R: VmRuntimeCtx> Drop for ScopeGuard<'a, 'ctx, 'id, 'ir, R> { fn drop(&mut self) { self.ctx.scopes.pop(); } } impl<'a, 'ctx, 'id, 'ir, R: VmRuntimeCtx> ScopeGuard<'a, 'ctx, 'id, 'ir, R> { fn as_ctx(&mut self) -> &mut DowngradeCtx<'ctx, 'id, 'ir, R> { self.ctx } } struct OwnedIr { _bump: Bump, ir: RawIrRef<'static>, } impl OwnedIr { /// # Safety /// `ir` must be an allocation backed by `bump`. The reference's /// lifetime is extended to `'static` as a placeholder; the stored IR /// must only be re-borrowed via [`OwnedIr::as_ref`], which narrows /// the lifetime back to that of the `&self` borrow. Moving `bump` /// into the struct keeps the underlying allocation live for the /// lifetime of the `OwnedIr`. unsafe fn new(ir: RawIrRef<'_>, bump: Bump) -> Self { Self { _bump: bump, // SAFETY: see function docs - caller guarantees `ir` is in `bump`, // and the `'static` lifetime is a placeholder narrowed by `as_ref`. ir: unsafe { std::mem::transmute::, RawIrRef<'static>>(ir) }, } } fn as_ref<'ir>(&'ir self) -> RawIrRef<'ir> { // SAFETY: narrows the placeholder `'static` lifetime stored in // `self.ir` down to `'ir = &'ir self`. Lifetime shortening is // logically sound for covariant positions; the transmute is only // needed because `RawRef<'ir>` carries `'ir` through a GAT // (`Ref::Ref`), which prevents the compiler from inferring // covariance automatically. The bump arena that backs the IR is // owned by `self._bump`, so the data is live for at least `'ir`. unsafe { std::mem::transmute::, RawIrRef<'ir>>(self.ir) } } } impl DisassemblerContext for Evaluator { fn get_code(&self) -> &[u8] { &self.code.bytecode } #[allow(clippy::unwrap_used)] fn resolve_string(&self, id: u32) -> &str { let id = string_interner::symbol::SymbolU32::try_from_usize(id as usize).unwrap(); self.runtime.strings.resolve(id).unwrap() } }