From d98e389606f0bb1bc0daf2d77d5769aa4e9c146a Mon Sep 17 00:00:00 2001 From: imxyy_soope_ Date: Sun, 17 May 2026 17:02:49 +0800 Subject: [PATCH] implement string context --- Cargo.toml | 2 +- fix-abstract-vm/src/host.rs | 14 +- fix-abstract-vm/src/lib.rs | 2 + fix-abstract-vm/src/string_context.rs | 161 ++++++++++ fix-abstract-vm/src/value.rs | 30 +- fix-builtins/src/lib.rs | 10 + fix-primops/src/context.rs | 447 ++++++++++++++++++++++++++ fix-primops/src/lib.rs | 13 + fix-vm/src/instructions/arithmetic.rs | 8 +- fix-vm/src/instructions/misc.rs | 26 +- fix/src/string_context.rs | 209 ------------ 11 files changed, 698 insertions(+), 224 deletions(-) create mode 100644 fix-abstract-vm/src/string_context.rs create mode 100644 fix-primops/src/context.rs delete mode 100644 fix/src/string_context.rs diff --git a/Cargo.toml b/Cargo.toml index 54a11d9..04b375d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ bumpalo = { version = "3.20", features = [ ghost-cell = "0.2" hashbrown = "0.16" num_enum = "0.7.5" -smallvec = "1.15" +smallvec = { version = "1.15", features = ["const_new", "const_generics"] } ere = "0.2" string-interner = "0.19" rnix = "0.14" diff --git a/fix-abstract-vm/src/host.rs b/fix-abstract-vm/src/host.rs index 764169f..f3adf89 100644 --- a/fix-abstract-vm/src/host.rs +++ b/fix-abstract-vm/src/host.rs @@ -4,8 +4,7 @@ use fix_error::Source; use hashbrown::HashSet; use crate::{ - AttrSet, Closure, ExtraScope, List, NixString, NixType, Null, Path, PrimOp, PrimOpApp, - StaticValue, StrictValue, Thunk, ThunkState, Value, + AttrSet, Closure, ExtraScope, List, NixString, NixType, Null, Path, PrimOp, PrimOpApp, StaticValue, StrictValue, StringContext, Thunk, ThunkState, Value }; pub trait VmContext { @@ -36,6 +35,9 @@ pub trait VmRuntimeCtxExt: VmRuntimeCtx { &'a mut self, val: StrictValue<'gc>, ) -> std::result::Result; + /// Returns the string context attached to `val`, or `&[]` if `val` is + /// either a non-string or a string without context. + fn get_string_context<'gc>(&self, val: StrictValue<'gc>) -> &'gc StringContext; fn convert_value(&self, val: Value) -> fix_common::Value; } @@ -73,6 +75,14 @@ impl VmRuntimeCtxExt for T { } } + fn get_string_context<'gc>(&self, val: StrictValue<'gc>) -> &'gc StringContext { + if let Some(ns) = val.as_gc::() { + ns.as_ref().context() + } else { + StringContext::empty() + } + } + fn convert_value(&self, val: Value) -> fix_common::Value { self.convert_value_with_seen(val, &mut HashSet::new()) } diff --git a/fix-abstract-vm/src/lib.rs b/fix-abstract-vm/src/lib.rs index 465b01a..a1ea484 100644 --- a/fix-abstract-vm/src/lib.rs +++ b/fix-abstract-vm/src/lib.rs @@ -6,6 +6,7 @@ mod machine; mod path_util; mod resolve; mod state; +mod string_context; mod value; pub use bytecode_reader::*; @@ -15,4 +16,5 @@ pub use machine::*; pub use path_util::*; pub use resolve::*; pub use state::*; +pub use string_context::*; pub use value::*; diff --git a/fix-abstract-vm/src/string_context.rs b/fix-abstract-vm/src/string_context.rs new file mode 100644 index 0000000..71f2d95 --- /dev/null +++ b/fix-abstract-vm/src/string_context.rs @@ -0,0 +1,161 @@ +use std::cmp::Ordering; + +use smallvec::SmallVec; + +/// A string context element +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum StringContextElem { + // Plain store path reference + Opaque { + path: Box, + }, + // All outputs of a derivation + // encoded `=` + DrvDeep { + drv_path: Box, + }, + // A specific output of a derivation + // encoded `!!` + Built { + drv_path: Box, + output: Box, + }, +} + +impl StringContextElem { + /// Decode the CppNix wire form (`!out!/p`, `=/p`, `/p`). Falls back to + /// `Opaque` for malformed `!`-prefixed inputs (matching nix-js). + pub fn decode(encoded: &str) -> Self { + if let Some(drv_path) = encoded.strip_prefix('=') { + Self::DrvDeep { + drv_path: drv_path.into(), + } + } else if let Some(rest) = encoded.strip_prefix('!') { + if let Some(second_bang) = rest.find('!') { + Self::Built { + output: rest[..second_bang].into(), + drv_path: rest[second_bang + 1..].into(), + } + } else { + Self::Opaque { + path: encoded.into(), + } + } + } else { + Self::Opaque { + path: encoded.into(), + } + } + } + + pub fn encode(&self) -> String { + match self { + Self::Opaque { path } => path.to_string(), + Self::DrvDeep { drv_path } => format!("={drv_path}"), + Self::Built { drv_path, output } => format!("!{output}!{drv_path}"), + } + } +} + +#[derive(Debug, Clone, Default)] +pub struct StringContext { + data: SmallVec<[StringContextElem; 1]>, +} + +impl IntoIterator for StringContext { + type Item = StringContextElem; + type IntoIter = as IntoIterator>::IntoIter; + fn into_iter(self) -> Self::IntoIter { + self.data.into_iter() + } +} + +impl<'a> IntoIterator for &'a StringContext { + type Item = &'a StringContextElem; + type IntoIter = <&'a SmallVec<[StringContextElem; 1]> as IntoIterator>::IntoIter; + fn into_iter(self) -> Self::IntoIter { + self.data.iter() + } +} + +impl<'a> IntoIterator for &'a mut StringContext { + type Item = &'a mut StringContextElem; + type IntoIter = <&'a mut SmallVec<[StringContextElem; 1]> as IntoIterator>::IntoIter; + fn into_iter(self) -> Self::IntoIter { + self.data.iter_mut() + } +} + +impl FromIterator for StringContext { + fn from_iter>(iter: T) -> Self { + Self { + data: iter.into_iter().collect() + } + } +} + +impl StringContext { + pub fn empty() -> &'static Self { + static EMPTY: StringContext = StringContext { + data: SmallVec::new_const(), + }; + &EMPTY + } + + pub fn new() -> Self { + Self::default() + } + + pub fn is_empty(&self) -> bool { + self.data.is_empty() + } + + pub fn insert(&mut self, elem: StringContextElem) { + match self.data.binary_search(&elem) { + Ok(_) => {} + Err(pos) => self.data.insert(pos, elem), + } + } + + pub fn merge(&self, other: &Self) -> Self { + if self.data.is_empty() { + return other.clone(); + } + if other.data.is_empty() { + return self.clone(); + } + + let a = &self.data; + let b = &other.data; + let mut out = SmallVec::with_capacity(a.len() + b.len()); + let (mut i, mut j) = (0, 0); + while i < a.len() && j < b.len() { + match a[i].cmp(&b[j]) { + Ordering::Less => { + out.push(a[i].clone()); + i += 1; + } + Ordering::Greater => { + out.push(b[j].clone()); + j += 1; + } + Ordering::Equal => { + out.push(a[i].clone()); + i += 1; + j += 1; + } + } + } + out.extend(a[i..].iter().cloned()); + out.extend(b[j..].iter().cloned()); + Self { data: out } + } + + pub fn iter(&self) -> <&Self as IntoIterator>::IntoIter { + self.into_iter() + } + + pub fn iter_mut(&mut self) -> <&mut Self as IntoIterator>::IntoIter { + self.into_iter() + } +} diff --git a/fix-abstract-vm/src/value.rs b/fix-abstract-vm/src/value.rs index 621c11f..ff7eb3b 100644 --- a/fix-abstract-vm/src/value.rs +++ b/fix-abstract-vm/src/value.rs @@ -17,6 +17,7 @@ use string_interner::Symbol; use string_interner::symbol::SymbolU32; use crate::boxing::{RawBox, RawStore, RawTag, Value as RawValue}; +use crate::string_context::StringContext; mod private { pub trait Cealed {} @@ -420,26 +421,41 @@ impl RawStore for Path { } } -/// Heap-allocated Nix string. -/// -/// Stored on the GC heap via `Gc<'gc, NixString>`. The string data itself -/// lives in a standard `Box` owned by this struct; the GC only manages -/// the outer allocation. #[derive(Collect)] #[collect(require_static)] pub struct NixString { data: Box, - // TODO: string context for derivation dependency tracking + context: StringContext, } impl NixString { pub fn new(s: impl Into>) -> Self { - Self { data: s.into() } + Self { + data: s.into(), + context: StringContext::new(), + } + } + + /// Construct a `NixString` whose `context` is already sorted+deduped. + /// The caller is responsible for invariant maintenance. + pub fn with_context(s: impl Into>, context: StringContext) -> Self { + Self { + data: s.into(), + context, + } } pub fn as_str(&self) -> &str { &self.data } + + pub fn context(&self) -> &StringContext { + &self.context + } + + pub fn has_context(&self) -> bool { + !self.context.is_empty() + } } impl fmt::Debug for NixString { diff --git a/fix-builtins/src/lib.rs b/fix-builtins/src/lib.rs index 51bd757..1028078 100644 --- a/fix-builtins/src/lib.rs +++ b/fix-builtins/src/lib.rs @@ -118,6 +118,7 @@ define_builtins! { ("__tryEval", TryEval, 1), ("__typeOf", TypeOf, 1), ("__unsafeDiscardStringContext", UnsafeDiscardStringContext, 1), + ("__unsafeDiscardOutputDependency", UnsafeDiscardOutputDependency, 1), ("__unsafeGetAttrPos", UnsafeGetAttrPos, 2), ("__warn", Warn, 2), ("__zipAttrsWith", ZipAttrsWith, 2), @@ -257,6 +258,14 @@ pub enum PrimOpPhase { ImportFinalize, ScopedImportFinalize, + AppendContextLoop, + AppendContextEntryForced, + AppendContextOutputsForced, + AppendContextOutputElementLoop, + AppendContextOutputElementForced, + + UnsafeDiscardOutputDependency, + Illegal, } @@ -375,6 +384,7 @@ impl BuiltinId { TryEval => PrimOpPhase::TryEval, TypeOf => PrimOpPhase::TypeOf, UnsafeDiscardStringContext => PrimOpPhase::UnsafeDiscardStringContext, + UnsafeDiscardOutputDependency => PrimOpPhase::UnsafeDiscardOutputDependency, UnsafeGetAttrPos => PrimOpPhase::UnsafeGetAttrPos, Warn => PrimOpPhase::Warn, ZipAttrsWith => PrimOpPhase::ZipAttrsWith, diff --git a/fix-primops/src/context.rs b/fix-primops/src/context.rs new file mode 100644 index 0000000..0d40b27 --- /dev/null +++ b/fix-primops/src/context.rs @@ -0,0 +1,447 @@ +//! `builtins.hasContext`, `builtins.getContext`, `builtins.appendContext`, +//! `builtins.unsafeDiscardStringContext`, +//! `builtins.unsafeDiscardOutputDependency`. +//! +//! See `fix-abstract-vm/src/string_context.rs` for the +//! `StringContextElem` type. + +use fix_abstract_vm::{ + AttrSet, BytecodeReader, List as VmList, Machine, MachineExt, NixString, NixType, Step, + StrictValue, StringContext, StringContextElem, Value, VmRuntimeCtx, VmRuntimeCtxExt, +}; +use fix_builtins::PrimOpPhase; +use fix_common::StringId; +use fix_error::Error; +use gc_arena::{Gc, Mutation}; +use smallvec::SmallVec; + +pub fn has_context<'gc, M: Machine<'gc>>( + m: &mut M, + ctx: &mut impl VmRuntimeCtx, + reader: &mut BytecodeReader<'_>, + mc: &Mutation<'gc>, +) -> Step { + let val = m.force_and_retry::(reader, mc)?; + if !val.is::() && val.as_gc::().is_none() { + return m.finish_type_err(NixType::String, val.ty()); + } + let has_ctx = !ctx.get_string_context(val).is_empty(); + m.return_from_primop(Value::new_inline(has_ctx), reader) +} + +pub fn unsafe_discard_string_context<'gc, M: Machine<'gc>>( + m: &mut M, + ctx: &mut impl VmRuntimeCtx, + reader: &mut BytecodeReader<'_>, + mc: &Mutation<'gc>, +) -> Step { + let val = m.force_and_retry::(reader, mc)?; + if let Some(sid) = val.as_inline::() { + return m.return_from_primop(Value::new_inline(sid), reader); + } + let Some(ns) = val.as_gc::() else { + return m.finish_type_err(NixType::String, val.ty()); + }; + let sid = ctx.intern_string(ns.as_str()); + m.return_from_primop(Value::new_inline(sid), reader) +} + +pub fn unsafe_discard_output_dependency<'gc, M: Machine<'gc>>( + m: &mut M, + ctx: &mut impl VmRuntimeCtx, + reader: &mut BytecodeReader<'_>, + mc: &Mutation<'gc>, +) -> Step { + let val = m.force_and_retry::(reader, mc)?; + if let Some(sid) = val.as_inline::() { + return m.return_from_primop(Value::new_inline(sid), reader); + } + let Some(ns) = val.as_gc::() else { + return m.finish_type_err(NixType::String, val.ty()); + }; + if ns.context().is_empty() { + let sid = ctx.intern_string(ns.as_str()); + return m.return_from_primop(Value::new_inline(sid), reader); + } + + let mut new_ctx = StringContext::new(); + for elem in ns.context() { + let replacement = match elem { + StringContextElem::DrvDeep { drv_path } => StringContextElem::Opaque { + path: drv_path.clone(), + }, + other => other.clone(), + }; + new_ctx.insert(replacement); + } + + let s: Box = ns.as_str().into(); + let new_ns = Gc::new(mc, NixString::with_context(s, new_ctx)); + m.return_from_primop(Value::new_gc(new_ns), reader) +} + +pub fn get_context<'gc, M: Machine<'gc>>( + m: &mut M, + ctx: &mut impl VmRuntimeCtx, + reader: &mut BytecodeReader<'_>, + mc: &Mutation<'gc>, +) -> Step { + let val = m.force_and_retry::(reader, mc)?; + if !val.is::() && val.as_gc::().is_none() { + return m.finish_type_err(NixType::String, val.ty()); + } + let elems = ctx.get_string_context(val); + + struct Info { + path: bool, + all_outputs: bool, + outputs: SmallVec<[Box; 2]>, + } + impl Info { + fn new() -> Self { + Self { + path: false, + all_outputs: false, + outputs: SmallVec::new(), + } + } + } + let mut by_path: std::collections::BTreeMap, Info> = std::collections::BTreeMap::new(); + for elem in elems { + match elem { + StringContextElem::Opaque { path } => { + by_path.entry(path.clone()).or_insert_with(Info::new).path = true; + } + StringContextElem::DrvDeep { drv_path } => { + by_path + .entry(drv_path.clone()) + .or_insert_with(Info::new) + .all_outputs = true; + } + StringContextElem::Built { drv_path, output } => { + by_path + .entry(drv_path.clone()) + .or_insert_with(Info::new) + .outputs + .push(output.clone()); + } + } + } + + let mut outer_entries: SmallVec<[(StringId, Value<'gc>); 4]> = SmallVec::new(); + for (path, mut info) in by_path { + info.outputs.sort(); + info.outputs.dedup(); + + let mut sub: SmallVec<[(StringId, Value<'gc>); 4]> = SmallVec::new(); + if info.all_outputs { + sub.push((ctx.intern_string("allOutputs"), Value::new_inline(true))); + } + if !info.outputs.is_empty() { + let items: smallvec::SmallVec<[Value<'gc>; 4]> = info + .outputs + .iter() + .map(|o| Value::new_inline(ctx.intern_string(o))) + .collect(); + let list = VmList::new(mc, items); + sub.push((ctx.intern_string("outputs"), Value::new_gc(list))); + } + if info.path { + sub.push((ctx.intern_string("path"), Value::new_inline(true))); + } + sub.sort_by_key(|(k, _)| *k); + let sub_attrs = Gc::new(mc, AttrSet::from_sorted_unchecked(sub)); + + outer_entries.push((ctx.intern_string(&path), Value::new_gc(sub_attrs))); + } + outer_entries.sort_by_key(|(k, _)| *k); + + let outer = Gc::new(mc, AttrSet::from_sorted_unchecked(outer_entries)); + m.return_from_primop(Value::new_gc(outer), reader) +} + +/// appendContext :: String -> AttrSet -> String +/// The context AttrSet maps store-path strings to `{ path?: Bool, allOutputs?: +/// Bool, outputs?: [String] }`. Each present field contributes one +/// StringContextElem to the result. +/// +/// Requires forcing nested attrset values and list elements lazily, so it's +/// structured as a state machine with the following stack layout: +/// +/// [strVal, attrs, idx, acc] - outer loop +/// [strVal, attrs, idx, acc, entryAttrs] - after entry forced +/// [strVal, attrs, idx, acc, list] - after `outputs` forced +/// [strVal, attrs, idx, acc, list, oidx] - output-element loop +/// [strVal, attrs, idx, acc, list, oidx, outElem] - after element forced +/// +/// `acc` is a sentinel `NixString` whose `data` is empty and whose `context` +/// is the accumulator. The string value itself is preserved in `strVal` and +/// retrieved at finalization. +/// +// TODO: handle thunk-valued `path` and `allOutputs` sub-attrs; currently they +// must be already-evaluated booleans. +pub fn append_context<'gc, M: Machine<'gc>>( + m: &mut M, + ctx: &mut impl VmRuntimeCtx, + reader: &mut BytecodeReader<'_>, + mc: &Mutation<'gc>, +) -> Step { + let (str_val, attrs) = m.force_and_retry::<(StrictValue, Gc)>(reader, mc)?; + + let initial_ctx: StringContext = ctx.get_string_context(str_val).clone(); + let acc = Gc::new(mc, NixString::with_context("", initial_ctx)); + + m.push(str_val.relax()); + m.push(Value::new_gc(attrs)); + m.push(Value::new_inline(0i32)); + m.push(Value::new_gc(acc)); + + reader.set_pc(PrimOpPhase::AppendContextLoop.ip() as usize); + Step::Continue(()) +} + +pub fn append_context_loop<'gc, M: Machine<'gc>>( + m: &mut M, + ctx: &mut impl VmRuntimeCtx, + reader: &mut BytecodeReader<'_>, + mc: &Mutation<'gc>, +) -> Step { + #[allow(clippy::unwrap_used)] + let idx = m.peek(1).as_inline::().unwrap(); + #[allow(clippy::unwrap_used)] + let attrs = m.peek_forced(2).as_gc::().unwrap(); + + if idx as usize >= attrs.entries.len() { + return append_context_finalize(m, ctx, reader, mc); + } + + let entry_val = attrs.entries[idx as usize].1; + m.push(entry_val); + m.force_slot_to_pc( + 0, + reader, + mc, + PrimOpPhase::AppendContextEntryForced.ip() as usize, + )?; + reader.set_pc(PrimOpPhase::AppendContextEntryForced.ip() as usize); + Step::Continue(()) +} + +pub fn append_context_entry_forced<'gc, M: Machine<'gc>>( + m: &mut M, + ctx: &mut impl VmRuntimeCtx, + reader: &mut BytecodeReader<'_>, + mc: &Mutation<'gc>, +) -> Step { + // Stack: [strVal, attrs, idx, acc, entryAttrs(thunk)] + // The slot still holds the Thunk pointer; re-force to extract the now- + // Evaluated value into the slot. + m.force_slot(0, reader, mc)?; + let entry_val = m.peek_forced(0); + let Some(entry_attrs) = entry_val.as_gc::() else { + return m.finish_type_err(NixType::AttrSet, entry_val.ty()); + }; + + #[allow(clippy::unwrap_used)] + let idx = m.peek(2).as_inline::().unwrap(); + #[allow(clippy::unwrap_used)] + let outer = m.peek_forced(3).as_gc::().unwrap(); + let path_key = outer.entries[idx as usize].0; + let path_str_owned: Box = ctx.resolve_string(path_key).into(); + if !path_str_owned.starts_with("/nix/store/") { + return m.finish_err(Error::eval_error(format!( + "context key '{path_str_owned}' is not a store path" + ))); + } + + // Eagerly handle `path` and `allOutputs` (assumed already-forced + // booleans - most callers either set them to literal `true` or omit + // them entirely). + // TODO: force these two attributes correctly + let path_id = ctx.intern_string("path"); + let all_outputs_id = ctx.intern_string("allOutputs"); + let outputs_id = ctx.intern_string("outputs"); + + #[allow(clippy::unwrap_used)] + let acc_gc = m.peek(1).as_gc::().unwrap(); + let mut new_acc: StringContext = acc_gc.context().iter().cloned().collect(); + + if let Some(v) = entry_attrs.lookup(path_id) + && v.as_inline::() == Some(true) + { + new_acc.insert(StringContextElem::Opaque { + path: path_str_owned.clone(), + }); + } + + if let Some(v) = entry_attrs.lookup(all_outputs_id) + && v.as_inline::() == Some(true) + { + if !path_str_owned.ends_with(".drv") { + return m.finish_err(Error::eval_error(format!( + "tried to add all-outputs context of {path_str_owned}, which is not a derivation, to a string" + ))); + } + new_acc.insert(StringContextElem::DrvDeep { + drv_path: path_str_owned.clone(), + }); + } + + let new_acc_gc = Gc::new(mc, NixString::with_context("", new_acc)); + m.replace(1, Value::new_gc(new_acc_gc)); + + if let Some(outputs_val) = entry_attrs.lookup(outputs_id) { + m.replace(0, outputs_val); + m.force_slot_to_pc( + 0, + reader, + mc, + PrimOpPhase::AppendContextOutputsForced.ip() as usize, + )?; + reader.set_pc(PrimOpPhase::AppendContextOutputsForced.ip() as usize); + return Step::Continue(()); + } + + let _ = m.pop(); + #[allow(clippy::unwrap_used)] + let idx_back = m.peek(1).as_inline::().unwrap(); + m.replace(1, Value::new_inline(idx_back + 1)); + reader.set_pc(PrimOpPhase::AppendContextLoop.ip() as usize); + Step::Continue(()) +} + +pub fn append_context_outputs_forced<'gc, M: Machine<'gc>>( + m: &mut M, + _ctx: &mut impl VmRuntimeCtx, + reader: &mut BytecodeReader<'_>, + mc: &Mutation<'gc>, +) -> Step { + m.force_slot(0, reader, mc)?; + let list_val = m.peek_forced(0); + let Some(list) = list_val.as_gc::() else { + return m.finish_type_err(NixType::List, list_val.ty()); + }; + if list.inner.borrow().is_empty() { + // Stack: [strVal, attrs, idx, acc, list] -> drop list, bump idx. + let _ = m.pop(); + #[allow(clippy::unwrap_used)] + let idx_back = m.peek(1).as_inline::().unwrap(); + m.replace(1, Value::new_inline(idx_back + 1)); + reader.set_pc(PrimOpPhase::AppendContextLoop.ip() as usize); + return Step::Continue(()); + } + + m.push(Value::new_inline(0i32)); + reader.set_pc(PrimOpPhase::AppendContextOutputElementLoop.ip() as usize); + Step::Continue(()) +} + +pub fn append_context_output_element_loop<'gc, M: Machine<'gc>>( + m: &mut M, + _ctx: &mut impl VmRuntimeCtx, + reader: &mut BytecodeReader<'_>, + mc: &Mutation<'gc>, +) -> Step { + #[allow(clippy::unwrap_used)] + let oidx = m.peek(0).as_inline::().unwrap(); + #[allow(clippy::unwrap_used)] + let list = m.peek_forced(1).as_gc::().unwrap(); + let len = list.inner.borrow().len(); + if oidx as usize >= len { + // Stack: [strVal, attrs, idx, acc, list, oidx] -> drop oidx & list, + // bump idx in place. + let _ = m.pop(); + let _ = m.pop(); + #[allow(clippy::unwrap_used)] + let idx_back = m.peek(1).as_inline::().unwrap(); + m.replace(1, Value::new_inline(idx_back + 1)); + reader.set_pc(PrimOpPhase::AppendContextLoop.ip() as usize); + return Step::Continue(()); + } + + let elem = list.inner.borrow()[oidx as usize]; + m.push(elem); + m.force_slot_to_pc( + 0, + reader, + mc, + PrimOpPhase::AppendContextOutputElementForced.ip() as usize, + )?; + reader.set_pc(PrimOpPhase::AppendContextOutputElementForced.ip() as usize); + Step::Continue(()) +} + +pub fn append_context_output_element_forced<'gc, M: Machine<'gc>>( + m: &mut M, + ctx: &mut impl VmRuntimeCtx, + reader: &mut BytecodeReader<'_>, + mc: &Mutation<'gc>, +) -> Step { + m.force_slot(0, reader, mc)?; + let elem = m.peek_forced(0); + let Some(output_name) = ctx.get_string(elem) else { + return m.finish_type_err(NixType::String, elem.ty()); + }; + let output_name: Box = output_name.into(); + + #[allow(clippy::unwrap_used)] + let idx = m.peek(4).as_inline::().unwrap(); + #[allow(clippy::unwrap_used)] + let outer = m.peek_forced(5).as_gc::().unwrap(); + let path_key = outer.entries[idx as usize].0; + let path_str: Box = ctx.resolve_string(path_key).into(); + if !path_str.ends_with(".drv") { + return m.finish_err(Error::eval_error(format!( + "tried to add derivation output context of {path_str}, which is not a derivation, to a string" + ))); + } + + #[allow(clippy::unwrap_used)] + let acc_gc = m.peek(3).as_gc::().unwrap(); + let mut new_acc: StringContext = acc_gc.context().iter().cloned().collect(); + new_acc.insert(StringContextElem::Built { + drv_path: path_str, + output: output_name, + }); + let new_acc_gc = Gc::new(mc, NixString::with_context("", new_acc)); + m.replace(3, Value::new_gc(new_acc_gc)); + + // Stack: [strVal, attrs, idx, acc, list, oidx, outElem] -> drop outElem, + // bump oidx in place. + let _ = m.pop(); + #[allow(clippy::unwrap_used)] + let oidx = m.peek(0).as_inline::().unwrap(); + m.replace(0, Value::new_inline(oidx + 1)); + reader.set_pc(PrimOpPhase::AppendContextOutputElementLoop.ip() as usize); + Step::Continue(()) +} + +fn append_context_finalize<'gc, M: Machine<'gc>>( + m: &mut M, + ctx: &mut impl VmRuntimeCtx, + reader: &mut BytecodeReader<'_>, + mc: &Mutation<'gc>, +) -> Step { + // Stack: [strVal, attrs, idx, acc] + #[allow(clippy::unwrap_used)] + let acc_gc = m.pop().as_gc::().unwrap(); + let _ = m.pop(); // idx + let _ = m.pop(); // attrs + let str_val_raw = m.pop(); + + // The strVal was already forced at entry; restrict() is infallible here. + let str_val = str_val_raw + .restrict() + .unwrap_or_else(|_| panic!("appendContext: strVal unexpectedly a thunk")); + + let s_str = ctx.get_string(str_val).unwrap_or("").to_owned(); + let context: StringContext = acc_gc.context().iter().cloned().collect(); + let result = if context.is_empty() { + let sid = ctx.intern_string(s_str); + Value::new_inline(sid) + } else { + let ns = Gc::new(mc, NixString::with_context(s_str, context)); + Value::new_gc(ns) + }; + m.return_from_primop(result, reader) +} diff --git a/fix-primops/src/lib.rs b/fix-primops/src/lib.rs index 408b677..e0dbe92 100644 --- a/fix-primops/src/lib.rs +++ b/fix-primops/src/lib.rs @@ -1,3 +1,4 @@ +mod context; mod control; mod conv; mod eq; @@ -5,6 +6,7 @@ mod io; mod list; mod path; +pub use context::*; pub use control::*; pub use conv::*; pub use eq::*; @@ -69,6 +71,17 @@ pub fn dispatch_primop<'gc, M: Machine<'gc>>( ToString => to_string(m, ctx, reader, mc), TypeOf => type_of(m, ctx, reader, mc), + HasContext => has_context(m, ctx, reader, mc), + GetContext => get_context(m, ctx, reader, mc), + AppendContext => append_context(m, ctx, reader, mc), + AppendContextLoop => append_context_loop(m, ctx, reader, mc), + AppendContextEntryForced => append_context_entry_forced(m, ctx, reader, mc), + AppendContextOutputsForced => append_context_outputs_forced(m, ctx, reader, mc), + AppendContextOutputElementLoop => append_context_output_element_loop(m, ctx, reader, mc), + AppendContextOutputElementForced => append_context_output_element_forced(m, ctx, reader, mc), + UnsafeDiscardStringContext => unsafe_discard_string_context(m, ctx, reader, mc), + UnsafeDiscardOutputDependency => unsafe_discard_output_dependency(m, ctx, reader, mc), + phase => todo!("primop phase {phase:?}"), } } diff --git a/fix-vm/src/instructions/arithmetic.rs b/fix-vm/src/instructions/arithmetic.rs index 8db1466..968a10d 100644 --- a/fix-vm/src/instructions/arithmetic.rs +++ b/fix-vm/src/instructions/arithmetic.rs @@ -33,7 +33,13 @@ impl<'gc> crate::Vm<'gc> { return Step::Continue(()); } if let (Some(ls), Some(rs)) = (ctx.get_string(lhs), ctx.get_string_or_path(rhs)) { - let ns = Gc::new(mc, crate::NixString::new(format!("{ls}{rs}"))); + let merged = ctx + .get_string_context(lhs) + .merge(ctx.get_string_context(rhs)); + let ns = Gc::new( + mc, + crate::NixString::with_context(format!("{ls}{rs}"), merged), + ); self.push(Value::new_gc(ns)); return Step::Continue(()); } diff --git a/fix-vm/src/instructions/misc.rs b/fix-vm/src/instructions/misc.rs index 4745e74..69cb47d 100644 --- a/fix-vm/src/instructions/misc.rs +++ b/fix-vm/src/instructions/misc.rs @@ -1,6 +1,8 @@ use std::path::PathBuf; -use fix_abstract_vm::{AttrSet, NixString, Path, StrictValue, canon_path_str}; +use fix_abstract_vm::{ + AttrSet, NixString, Path, StrictValue, StringContext, canon_path_str +}; use fix_builtins::BuiltinId; use fix_common::StringId; use fix_error::Error; @@ -89,29 +91,45 @@ impl<'gc> crate::Vm<'gc> { &mut self, ctx: &mut impl VmRuntimeCtx, reader: &mut BytecodeReader<'_>, - _mc: &gc_arena::Mutation<'gc>, + mc: &gc_arena::Mutation<'gc>, ) -> Step { let count = reader.read_u16() as usize; let _force_string = reader.read_u8() != 0; let mut total_len = 0; + let mut has_any_context = false; for i in 0..count { let val = self.peek_forced(count - 1 - i); let s = ctx.get_string(val).expect("coerced"); total_len += s.len(); + if !ctx.get_string_context(val).is_empty() { + has_any_context = true; + } } let mut result = String::with_capacity(total_len); + let mut merged = StringContext::new(); for i in 0..count { let val = self.peek_forced(count - 1 - i); let s = ctx.get_string(val).expect("coerced"); result.push_str(s); + if has_any_context { + let ctx = ctx.get_string_context(val); + if !ctx.is_empty() { + merged = merged.merge(ctx); + } + } } self.stack.truncate(self.stack.len() - count); - let sid = ctx.intern_string(result); - self.push(Value::new_inline(sid)); + if merged.is_empty() { + let sid = ctx.intern_string(result); + self.push(Value::new_inline(sid)); + } else { + let ns = gc_arena::Gc::new(mc, NixString::with_context(result, merged)); + self.push(Value::new_gc(ns)); + } Step::Continue(()) } diff --git a/fix/src/string_context.rs b/fix/src/string_context.rs deleted file mode 100644 index d353152..0000000 --- a/fix/src/string_context.rs +++ /dev/null @@ -1,209 +0,0 @@ -use std::collections::{BTreeMap, BTreeSet, VecDeque}; - -pub enum StringContextElem { - Opaque { path: String }, - DrvDeep { drv_path: String }, - Built { drv_path: String, output: String }, -} - -impl StringContextElem { - pub fn decode(encoded: &str) -> Self { - if let Some(drv_path) = encoded.strip_prefix('=') { - StringContextElem::DrvDeep { - drv_path: drv_path.to_string(), - } - } else if let Some(rest) = encoded.strip_prefix('!') { - if let Some(second_bang) = rest.find('!') { - let output = rest[..second_bang].to_string(); - let drv_path = rest[second_bang + 1..].to_string(); - StringContextElem::Built { drv_path, output } - } else { - StringContextElem::Opaque { - path: encoded.to_string(), - } - } - } else { - StringContextElem::Opaque { - path: encoded.to_string(), - } - } - } -} - -pub type InputDrvs = BTreeMap>; -pub type Srcs = BTreeSet; -pub fn extract_input_drvs_and_srcs(context: &[String]) -> Result<(InputDrvs, Srcs), String> { - let mut input_drvs: BTreeMap> = BTreeMap::new(); - let mut input_srcs: BTreeSet = BTreeSet::new(); - - for encoded in context { - match StringContextElem::decode(encoded) { - StringContextElem::Opaque { path } => { - input_srcs.insert(path); - } - StringContextElem::DrvDeep { drv_path } => { - compute_fs_closure(&drv_path, &mut input_drvs, &mut input_srcs)?; - } - StringContextElem::Built { drv_path, output } => { - input_drvs.entry(drv_path).or_default().insert(output); - } - } - } - - Ok((input_drvs, input_srcs)) -} - -fn compute_fs_closure( - drv_path: &str, - input_drvs: &mut BTreeMap>, - input_srcs: &mut BTreeSet, -) -> Result<(), String> { - let mut queue: VecDeque = VecDeque::new(); - let mut visited: BTreeSet = BTreeSet::new(); - - queue.push_back(drv_path.to_string()); - - while let Some(current_path) = queue.pop_front() { - if visited.contains(¤t_path) { - continue; - } - visited.insert(current_path.clone()); - input_srcs.insert(current_path.clone()); - - if !current_path.ends_with(".drv") { - continue; - } - - let content = std::fs::read_to_string(¤t_path) - .map_err(|e| format!("failed to read derivation {}: {}", current_path, e))?; - - let inputs = parse_derivation_inputs(&content) - .ok_or_else(|| format!("failed to parse derivation {}", current_path))?; - - for src in inputs.input_srcs { - input_srcs.insert(src.clone()); - if !visited.contains(&src) { - queue.push_back(src); - } - } - - for (dep_drv, outputs) in inputs.input_drvs { - input_srcs.insert(dep_drv.clone()); - let entry = input_drvs.entry(dep_drv.clone()).or_default(); - for output in outputs { - entry.insert(output); - } - if !visited.contains(&dep_drv) { - queue.push_back(dep_drv); - } - } - } - - Ok(()) -} - -struct DerivationInputs { - input_drvs: Vec<(String, Vec)>, - input_srcs: Vec, -} - -fn parse_derivation_inputs(aterm: &str) -> Option { - let aterm = aterm.strip_prefix("Derive([")?; - - let mut bracket_count: i32 = 1; - let mut pos = 0; - let bytes = aterm.as_bytes(); - while pos < bytes.len() && bracket_count > 0 { - match bytes[pos] { - b'[' => bracket_count += 1, - b']' => bracket_count -= 1, - _ => {} - } - pos += 1; - } - if bracket_count != 0 { - return None; - } - - let rest = &aterm[pos..]; - let rest = rest.strip_prefix(",[")?; - - let mut input_drvs = Vec::new(); - let mut bracket_count: i32 = 1; - let mut start = 0; - pos = 0; - let bytes = rest.as_bytes(); - - while pos < bytes.len() && bracket_count > 0 { - match bytes[pos] { - b'[' => bracket_count += 1, - b']' => bracket_count -= 1, - b'(' if bracket_count == 1 => { - start = pos; - } - b')' if bracket_count == 1 => { - let entry = &rest[start + 1..pos]; - if let Some((drv_path, outputs)) = parse_input_drv_entry(entry) { - input_drvs.push((drv_path, outputs)); - } - } - _ => {} - } - pos += 1; - } - - let rest = &rest[pos..]; - let rest = rest.strip_prefix(",[")?; - - let mut input_srcs = Vec::new(); - bracket_count = 1; - pos = 0; - let bytes = rest.as_bytes(); - - while pos < bytes.len() && bracket_count > 0 { - match bytes[pos] { - b'[' => bracket_count += 1, - b']' => bracket_count -= 1, - b'"' if bracket_count == 1 => { - pos += 1; - let src_start = pos; - while pos < bytes.len() && bytes[pos] != b'"' { - if bytes[pos] == b'\\' && pos + 1 < bytes.len() { - pos += 2; - } else { - pos += 1; - } - } - let src = std::str::from_utf8(&bytes[src_start..pos]).ok()?; - input_srcs.push(src.to_string()); - } - _ => {} - } - pos += 1; - } - - Some(DerivationInputs { - input_drvs, - input_srcs, - }) -} - -fn parse_input_drv_entry(entry: &str) -> Option<(String, Vec)> { - let entry = entry.strip_prefix('"')?; - let quote_end = entry.find('"')?; - let drv_path = entry[..quote_end].to_string(); - - let rest = &entry[quote_end + 1..]; - let rest = rest.strip_prefix(",[")?; - let rest = rest.strip_suffix(']')?; - - let mut outputs = Vec::new(); - for part in rest.split(',') { - let part = part.trim(); - if let Some(name) = part.strip_prefix('"').and_then(|s| s.strip_suffix('"')) { - outputs.push(name.to_string()); - } - } - - Some((drv_path, outputs)) -}