use std::cell::UnsafeCell; use std::hash::BuildHasher; use std::path::Path; use bumpalo::Bump; use ghost_cell::{GhostCell, GhostToken}; use hashbrown::{DefaultHashBuilder, HashMap, HashSet, HashTable}; use rnix::TextRange; use string_interner::DefaultStringInterner; use crate::bytecode::{self, Bytecode, BytecodeContext, Constant}; use crate::disassembler::{Disassembler, DisassemblerContext}; use crate::downgrade::*; use crate::error::{Error, Result, Source}; use crate::ir::{ArgId, Ir, IrKey, IrRef, RawIrRef, SymId, ThunkId, ir_content_eq}; use crate::store::{DaemonStore, Store, StoreConfig}; use crate::value::{Symbol, Value}; fn parse_error_span(error: &rnix::ParseError) -> Option { use rnix::ParseError::*; match error { Unexpected(range) | UnexpectedExtra(range) | UnexpectedWanted(_, range, _) | UnexpectedDoubleBind(range) | DuplicatedArgs(range, _) => Some(*range), _ => None, } } fn handle_parse_error<'a>( errors: impl IntoIterator, source: Source, ) -> Option> { for err in errors { if let Some(span) = parse_error_span(err) { return Some( Error::parse_error(err.to_string()) .with_source(source) .with_span(span), ); } } None } impl Context { pub fn eval(&mut self, _source: Source) -> Result { todo!() } pub fn eval_shallow(&mut self, _source: Source) -> Result { todo!() } pub fn eval_deep(&mut self, _source: Source) -> Result { todo!() } pub fn eval_repl<'a>(&'a mut self, _source: Source, _scope: &'a HashSet) -> Result { todo!() } pub fn disassemble(&self, bytecode: &Bytecode) -> String { Disassembler::new(bytecode, self).disassemble() } pub fn disassemble_colored(&self, bytecode: &Bytecode) -> String { Disassembler::new(bytecode, self).disassemble_colored() } pub fn add_binding<'a>( &'a mut self, name: &str, expr: &str, scope: &'a mut HashSet, ) -> Result { todo!() } } pub struct Context { symbols: DefaultStringInterner, global: HashMap>>, sources: Vec, store: DaemonStore, spans: UnsafeCell>, thunk_count: usize, global_strings: Vec, global_string_map: HashMap, global_constants: Vec, global_constant_map: HashMap, synced_strings: usize, synced_constants: usize, } /// Owns the bump allocator and a read-only reference into it. /// /// # Safety /// The `ir` field points into `_bump`'s storage. We use `'static` as a sentinel /// lifetime because the struct owns the backing memory. The `as_ref` method /// re-binds the lifetime to `&self`, preventing use-after-free. struct OwnedIr { _bump: Bump, ir: RawIrRef<'static>, } impl OwnedIr { fn as_ref(&self) -> RawIrRef<'_> { self.ir } } impl Context { pub fn new() -> Result { let mut symbols = DefaultStringInterner::new(); let mut global = HashMap::new(); let builtins_sym = symbols.get_or_intern("builtins"); global.insert(builtins_sym, Ir::Builtins); let free_globals = [ "abort", "baseNameOf", "break", "dirOf", "derivation", "derivationStrict", "fetchGit", "fetchMercurial", "fetchTarball", "fetchTree", "fromTOML", "import", "isNull", "map", "placeholder", "removeAttrs", "scopedImport", "throw", "toString", ]; let consts = [ ("true", Ir::Bool(true)), ("false", Ir::Bool(false)), ("null", Ir::Null), ]; for name in free_globals { let name = symbols.get_or_intern(name); let value = Ir::Builtin(name); global.insert(name, value); } for (name, value) in consts { let name = symbols.get_or_intern(name); global.insert(name, value); } let config = StoreConfig::from_env(); let store = DaemonStore::connect(&config.daemon_socket)?; Ok(Self { symbols, global, sources: Vec::new(), store, spans: UnsafeCell::new(Vec::new()), thunk_count: 0, global_strings: Vec::new(), global_string_map: HashMap::new(), global_constants: Vec::new(), global_constant_map: HashMap::new(), synced_strings: 0, synced_constants: 0, }) } fn downgrade_ctx<'ctx, 'id, 'ir>( &'ctx mut self, bump: &'ir Bump, token: GhostToken<'id>, extra_scope: Option>, ) -> DowngradeCtx<'ctx, 'id, 'ir> { let source = self.get_current_source(); DowngradeCtx::new( bump, token, &mut self.symbols, &self.global, extra_scope, &mut self.thunk_count, source, ) } fn get_current_dir(&self) -> &Path { self.sources .last() .as_ref() .expect("current_source is not set") .get_dir() } fn get_current_source(&self) -> Source { self.sources .last() .expect("current_source is not set") .clone() } fn downgrade<'ctx>( &'ctx mut self, source: Source, extra_scope: Option>, ) -> Result { tracing::debug!("Parsing Nix expression"); self.sources.push(source.clone()); let root = rnix::Root::parse(&source.src); handle_parse_error(root.errors(), source).map_or(Ok(()), Err)?; tracing::debug!("Downgrading Nix expression"); let expr = root .tree() .expr() .ok_or_else(|| Error::parse_error("unexpected EOF".into()))?; let bump = Bump::new(); GhostToken::new(|token| { let ir = self .downgrade_ctx(&bump, token, extra_scope) .downgrade_toplevel(expr)?; let ir = unsafe { std::mem::transmute::, RawIrRef<'static>>(ir) }; Ok(OwnedIr { _bump: bump, ir }) }) } pub fn compile_bytecode(&mut self, source: Source) -> Result { let root = self.downgrade(source, None)?; tracing::debug!("Generating bytecode"); let bytecode = bytecode::compile_bytecode(root.as_ref(), self); tracing::debug!("Compiled bytecode: {:#04X?}", bytecode.code); Ok(bytecode) } pub fn get_store_dir(&self) -> &str { self.store.get_store_dir() } } impl BytecodeContext for Context { fn intern_string(&mut self, s: &str) -> u32 { if let Some(&idx) = self.global_string_map.get(s) { return idx; } let idx = self.global_strings.len() as u32; self.global_strings.push(s.to_string()); self.global_string_map.insert(s.to_string(), idx); idx } fn intern_constant(&mut self, c: Constant) -> u32 { if let Some(&idx) = self.global_constant_map.get(&c) { return idx; } let idx = self.global_constants.len() as u32; self.global_constants.push(c.clone()); self.global_constant_map.insert(c, idx); idx } fn register_span(&self, range: TextRange) -> u32 { // FIXME: SAFETY let spans = unsafe { &mut *self.spans.get() }; let id = spans.len(); let source_id = self .sources .len() .checked_sub(1) .expect("current_source not set"); spans.push((source_id, range)); id as u32 } fn get_sym(&self, id: SymId) -> &str { self.symbols.resolve(id).expect("SymId out of bounds") } fn get_current_dir(&self) -> &Path { Context::get_current_dir(self) } } impl DisassemblerContext for Context { fn lookup_string(&self, id: u32) -> &str { self.global_strings .get(id as usize) .expect("string not found") } fn lookup_constant(&self, id: u32) -> &Constant { self.global_constants .get(id as usize) .expect("constant not found") } } enum Scope<'ctx> { Global(&'ctx HashMap>>), Repl(&'ctx HashSet), ScopedImport(HashSet), Let(HashMap), Param(SymId, ArgId), } struct ScopeGuard<'a, 'ctx, 'id, 'ir> { ctx: &'a mut DowngradeCtx<'ctx, 'id, 'ir>, } impl Drop for ScopeGuard<'_, '_, '_, '_> { fn drop(&mut self) { self.ctx.scopes.pop(); } } impl<'id, 'ir, 'ctx> ScopeGuard<'_, 'ctx, 'id, 'ir> { fn as_ctx(&mut self) -> &mut DowngradeCtx<'ctx, 'id, 'ir> { self.ctx } } struct ThunkScope<'id, 'ir> { bindings: bumpalo::collections::Vec<'ir, (ThunkId, IrRef<'id, 'ir>)>, cache: HashTable<(IrRef<'id, 'ir>, ThunkId)>, hasher: DefaultHashBuilder, } impl<'id, 'ir> ThunkScope<'id, 'ir> { fn new_in(bump: &'ir Bump) -> Self { Self { bindings: bumpalo::collections::Vec::new_in(bump), cache: HashTable::new(), hasher: DefaultHashBuilder::default(), } } fn lookup_cache(&self, key: IrRef<'id, 'ir>, token: &GhostToken<'id>) -> Option { let hash = self.hasher.hash_one(IrKey(key, token)); self.cache .find(hash, |&(ir, _)| ir_content_eq(key, ir, token)) .map(|&(_, id)| id) } fn add_binding(&mut self, id: ThunkId, ir: IrRef<'id, 'ir>, token: &GhostToken<'id>) { self.bindings.push((id, ir)); let hash = self.hasher.hash_one(IrKey(ir, token)); self.cache.insert_unique(hash, (ir, id), |&(ir, _)| { self.hasher.hash_one(IrKey(ir, token)) }); } fn extend_bindings(&mut self, iter: impl IntoIterator)>) { self.bindings.extend(iter); } } struct DowngradeCtx<'ctx, 'id, 'ir> { bump: &'ir Bump, token: GhostToken<'id>, symbols: &'ctx mut DefaultStringInterner, source: Source, scopes: Vec>, with_scope_count: usize, arg_count: usize, thunk_count: &'ctx mut usize, thunk_scopes: Vec>, } fn should_thunk<'id>(ir: IrRef<'id, '_>, token: &GhostToken<'id>) -> bool { !matches!( ir.borrow(token), Ir::Builtin(_) | Ir::Builtins | Ir::Int(_) | Ir::Float(_) | Ir::Bool(_) | Ir::Null | Ir::Str(_) | Ir::Thunk(_) ) } impl<'ctx, 'id, 'ir> DowngradeCtx<'ctx, 'id, 'ir> { fn new( bump: &'ir Bump, token: GhostToken<'id>, symbols: &'ctx mut DefaultStringInterner, global: &'ctx HashMap>>, extra_scope: Option>, thunk_count: &'ctx mut usize, source: Source, ) -> Self { Self { bump, token, symbols, source, scopes: std::iter::once(Scope::Global(global)) .chain(extra_scope) .collect(), thunk_count, arg_count: 0, with_scope_count: 0, thunk_scopes: vec![ThunkScope::new_in(bump)], } } } impl<'ctx: 'ir, 'id, 'ir> DowngradeContext<'id, 'ir> for DowngradeCtx<'ctx, 'id, 'ir> { fn new_expr(&self, expr: Ir<'ir, IrRef<'id, 'ir>>) -> IrRef<'id, 'ir> { IrRef::new(self.bump.alloc(GhostCell::new(expr))) } fn new_arg(&mut self) -> ArgId { self.arg_count += 1; ArgId(self.arg_count - 1) } fn maybe_thunk(&mut self, ir: IrRef<'id, 'ir>) -> IrRef<'id, 'ir> { if !should_thunk(ir, &self.token) { return ir; } let cached = self .thunk_scopes .last() .expect("no active cache scope") .lookup_cache(ir, &self.token); if let Some(id) = cached { return IrRef::alloc(self.bump, Ir::Thunk(id)); } let id = ThunkId(*self.thunk_count); *self.thunk_count = self.thunk_count.checked_add(1).expect("thunk id overflow"); self.thunk_scopes .last_mut() .expect("no active cache scope") .add_binding(id, ir, &self.token); IrRef::alloc(self.bump, Ir::Thunk(id)) } fn new_sym(&mut self, sym: String) -> SymId { self.symbols.get_or_intern(sym) } fn get_sym(&self, id: SymId) -> Symbol<'_> { self.symbols.resolve(id).expect("no symbol found").into() } fn lookup(&self, sym: SymId, span: TextRange) -> Result> { for scope in self.scopes.iter().rev() { match scope { &Scope::Global(global_scope) => { if let Some(expr) = global_scope.get(&sym) { let ir = match expr { Ir::Builtins => Ir::Builtins, Ir::Builtin(s) => Ir::Builtin(*s), Ir::Bool(b) => Ir::Bool(*b), Ir::Null => Ir::Null, _ => unreachable!("globals should only contain leaf IR nodes"), }; return Ok(self.new_expr(ir)); } } &Scope::Repl(repl_bindings) => { if repl_bindings.contains(&sym) { return Ok(self.new_expr(Ir::ReplBinding(sym))); } } Scope::ScopedImport(scoped_bindings) => { if scoped_bindings.contains(&sym) { return Ok(self.new_expr(Ir::ScopedImportBinding(sym))); } } Scope::Let(let_scope) => { if let Some(&expr) = let_scope.get(&sym) { return Ok(self.new_expr(Ir::Thunk(expr))); } } &Scope::Param(param_sym, id) => { if param_sym == sym { return Ok(self.new_expr(Ir::Arg(id))); } } } } if self.with_scope_count > 0 { Ok(self.new_expr(Ir::WithLookup(sym))) } else { Err(Error::downgrade_error( format!("'{}' not found", self.get_sym(sym)), self.get_current_source(), span, )) } } fn get_current_source(&self) -> Source { self.source.clone() } fn with_let_scope(&mut self, keys: &[SymId], f: F) -> Result where F: FnOnce(&mut Self) -> Result<(bumpalo::collections::Vec<'ir, IrRef<'id, 'ir>>, R)>, { let base = *self.thunk_count; *self.thunk_count = self .thunk_count .checked_add(keys.len()) .expect("thunk id overflow"); let iter = keys.iter().enumerate().map(|(offset, &key)| { ( key, ThunkId(unsafe { base.checked_add(offset).unwrap_unchecked() }), ) }); self.scopes.push(Scope::Let(iter.collect())); let (vals, ret) = { let mut guard = ScopeGuard { ctx: self }; f(guard.as_ctx())? }; assert_eq!(keys.len(), vals.len()); let scope = self.thunk_scopes.last_mut().expect("no active thunk scope"); scope.extend_bindings((base..base + keys.len()).map(ThunkId).zip(vals)); Ok(ret) } fn with_param_scope(&mut self, param: SymId, arg: ArgId, f: F) -> R where F: FnOnce(&mut Self) -> R, { self.scopes.push(Scope::Param(param, arg)); let mut guard = ScopeGuard { ctx: self }; f(guard.as_ctx()) } fn with_with_scope(&mut self, f: F) -> R where F: FnOnce(&mut Self) -> R, { self.with_scope_count += 1; let ret = f(self); self.with_scope_count -= 1; ret } fn with_thunk_scope( &mut self, f: F, ) -> ( R, bumpalo::collections::Vec<'ir, (ThunkId, IrRef<'id, 'ir>)>, ) where F: FnOnce(&mut Self) -> R, { self.thunk_scopes.push(ThunkScope::new_in(self.bump)); let ret = f(self); ( ret, self.thunk_scopes .pop() .expect("no thunk scope left???") .bindings, ) } fn bump(&self) -> &'ir bumpalo::Bump { self.bump } } impl<'id, 'ir, 'ctx: 'ir> DowngradeCtx<'ctx, 'id, 'ir> { fn downgrade_toplevel(mut self, root: rnix::ast::Expr) -> Result> { let body = root.downgrade(&mut self)?; let thunks = self .thunk_scopes .pop() .expect("no thunk scope left???") .bindings; let ir = IrRef::alloc(self.bump, Ir::TopLevel { body, thunks }); Ok(ir.freeze(self.token)) } }