feat: init Rust VM

This commit is contained in:
2026-03-14 11:42:39 +08:00
parent 40d00a6c47
commit 198d847151
26 changed files with 3620 additions and 993 deletions
+292 -167
View File
@@ -1,68 +1,121 @@
use std::fmt;
use std::marker::PhantomData;
use std::mem::size_of;
use std::ops::Deref;
use boxing::nan::raw::{RawBox, RawStore, RawTag, Value as RawValue};
use gc_arena::{Collect, Gc};
use hashbrown::HashTable;
use gc_arena::{Collect, Gc, Mutation, RefLock, collect::Trace};
use sealed::sealed;
use smallvec::SmallVec;
use string_interner::{Symbol, symbol::SymbolU32};
trait Storable {
use crate::ir::StringId;
#[sealed]
pub(crate) trait Storable {
const TAG: (bool, u8);
}
trait InlineStorable: Storable + RawStore {}
trait GcStorable: Storable {}
pub(crate) trait InlineStorable: Storable + RawStore {}
pub(crate) trait GcStorable: Storable {}
macro_rules! inline_types {
($($type:ty => $id:expr),*$(,)?) => {
macro_rules! define_value_types {
(
inline { $($itype:ty => $itag:expr, $iname:literal;)* }
gc { $($gtype:ty => $gtag:expr, $gname:literal;)* }
) => {
$(
impl Storable for $type {
const TAG: (bool, u8) = (false, $id);
#[sealed]
impl Storable for $itype {
const TAG: (bool, u8) = $itag;
}
impl InlineStorable for $type {}
impl InlineStorable for $itype {}
)*
};
}
macro_rules! gc_types {
($($type:ty => $id:expr),*$(,)?) => {
$(
impl Storable for $type {
const TAG: (bool, u8) = (true, $id);
#[sealed]
impl Storable for $gtype {
const TAG: (bool, u8) = $gtag;
}
impl GcStorable for $type {}
impl GcStorable for $gtype {}
)*
const _: () = assert!(size_of::<Value<'static>>() == 8);
$(const _: () = assert!(size_of::<$itype>() <= 6);)*
$(const _: () = { let (_, val) = $itag; assert!(val >= 1 && val <= 7); };)*
$(const _: () = { let (_, val) = $gtag; assert!(val >= 1 && val <= 7); };)*
const _: () = {
let tags: &[(bool, u8)] = &[$($itag),*, $($gtag),*];
let mut mask_false: u8 = 0;
let mut mask_true: u8 = 0;
let mut i = 0;
while i < tags.len() {
let (neg, val) = tags[i];
let bit = 1 << val;
if neg {
assert!(mask_true & bit == 0, "duplicate true tag id");
mask_true |= bit;
} else {
assert!(mask_false & bit == 0, "duplicate false tag id");
mask_false |= bit;
}
i += 1;
}
};
unsafe impl<'gc> Collect<'gc> for Value<'gc> {
const NEEDS_TRACE: bool = true;
fn trace<T: Trace<'gc>>(&self, cc: &mut T) {
let Some(tag) = self.raw.tag() else { return };
match tag.neg_val() {
$(<$gtype as Storable>::TAG => unsafe {
self.load_gc::<$gtype>().trace(cc)
},)*
$(<$itype as Storable>::TAG => (),)*
(neg, val) => unreachable!("invalid tag: neg={neg}, val={val}"),
}
}
}
impl fmt::Debug for Value<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.tag() {
None => write!(f, "Float({:?})", unsafe {
self.raw.float().unwrap_unchecked()
}),
$(Some(<$itype as Storable>::TAG) => write!(f, "{}({:?})", $iname, unsafe {
self.as_inline::<$itype>().unwrap_unchecked()
}),)*
$(Some(<$gtype as Storable>::TAG) =>
write!(f, "{}({:?})", $gname, unsafe { self.as_gc::<$gtype>().unwrap_unchecked() }),)*
Some((neg, val)) => write!(f, "Unknown(neg={neg}, val={val})"),
}
}
}
};
}
inline_types! {
i32 => 1,
bool => 2,
Null => 3,
SmallStringId => 4,
}
gc_types! {
i64 => 1,
NixString => 2,
SmallAttrSet<'_> => 3,
AttrSet<'_> => 4,
Box<[Value<'_>]> => 5,
define_value_types! {
inline {
i32 => (false, 1), "SmallInt";
bool => (false, 2), "Bool";
Null => (false, 3), "Null";
StringId => (false, 4), "SmallString";
PrimOp => (false, 5), "PrimOp";
}
gc {
i64 => (false, 6), "BigInt";
NixString => (false, 7), "String";
AttrSet<'_> => (true, 1), "AttrSet";
List<'_> => (true, 2), "List";
Thunk<'_> => (true, 3), "Thunk";
Closure<'_> => (true, 4), "Closure";
PrimOpApp<'_> => (true, 5), "PrimOpApp";
}
}
/// # Nix runtime value representation
///
/// NaN-boxed value fitting in 8 bytes. Morally equivalent to:
/// ```ignore
/// enum Value<'gc> {
/// Float(SingleNaNF64),
/// SmallInt(i32),
/// BigInt(Gc<'gc, i64>),
/// Bool(bool),
/// Null,
/// SmallString(SmallStringId),
/// String(Gc<'gc, NixString>),
/// SmallAttrSet(Gc<'gc, SmallAttrSet<'gc>>),
/// AttrSet(Gc<'gc, AttrSet<'gc>>),
/// List(Gc<'gc, Box<[Value<'gc>]>>),
/// }
/// ```
/// NaN-boxed value fitting in 8 bytes.
#[repr(transparent)]
pub(crate) struct Value<'gc> {
raw: RawBox,
@@ -79,90 +132,16 @@ impl Clone for Value<'_> {
}
}
macro_rules! trace_impl {
($self:expr, $cc:expr, $tag:expr; $($type:ty),*$(,)?) => {
match $tag.neg_val() {
// Positive tags are inline data - skip it.
(false, _) => (),
// Negative tags are Gc pointers - reconstruct and trace it.
$(
<$type as Storable>::TAG => unsafe { $self.load_gc::<$type>().trace($cc) },
)*
(true, val) => unreachable!("invalid negative tag value: {val}"),
}
};
}
unsafe impl<'gc> Collect for Value<'gc> {
fn trace(&self, cc: &gc_arena::Collection) {
// No tag - raw float
let Some(tag) = self.raw.tag() else { return };
trace_impl! {
self, cc, tag;
i64,
NixString,
SmallAttrSet<'gc>,
AttrSet<'gc>,
Box<[Value<'gc>]>,
}
}
fn needs_trace() -> bool
where
Self: Sized,
{
true
}
}
macro_rules! debug_impl {
($self:expr; $($type:ty),*$(,)?) => {
match $self.tag() {
None => write!(f, "Float({:?})", unsafe {
self.raw.float().unwrap_unchecked()
}),
$(
Some()
)*
}
};
}
impl fmt::Debug for Value<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.tag() {
None => write!(f, "Float({:?})", unsafe {
self.raw.float().unwrap_unchecked()
}),
Some(i32::TAG) => write!(f, "SmallInt({:?})", unsafe {
self.as_inline::<i32>().unwrap_unchecked()
}),
Some(bool::TAG) => write!(f, "Bool({:?})", unsafe {
self.as_inline::<bool>().unwrap_unchecked()
}),
Some(Null::TAG) => write!(f, "Null"),
Some(SmallStringId::TAG) => {
write!(f, "SmallString({:?})", unsafe {
self.as_inline::<SmallStringId>().unwrap_unchecked().0
})
}
Some(i64::TAG) => write!(f, "BigInt(Gc<..>)"),
Some(NixString::TAG) => write!(f, "String(Gc<..>)"),
Some(SmallAttrSet::TAG) => write!(f, "SmallAttrSet(Gc<..>)"),
Some(AttrSet::TAG) => write!(f, "AttrSet(Gc<..>)"),
Some(<Box<[Value<'_>]> as Storable>::TAG) => write!(f, "List(Gc<..>)"),
Some((neg, val)) => write!(f, "Unknown(neg={neg}, val={val})"),
}
impl Default for Value<'_> {
#[inline(always)]
fn default() -> Self {
Self::new_inline(Null)
}
}
impl<'gc> Value<'gc> {
#[inline(always)]
fn mk_tag(neg: bool, val: u8) -> RawTag {
debug_assert!((1..=7).contains(&val));
// Safety: val is asserted to be in 1..=7.
unsafe { RawTag::new_unchecked(neg, val) }
}
@@ -175,20 +154,13 @@ impl<'gc> Value<'gc> {
}
}
/// Store a GC pointer with the given (negative) tag value.
#[inline(always)]
fn store_gc<T>(tag_val: u8, gc: Gc<'gc, T>) -> Self {
let ptr = Gc::as_ptr(gc);
Self::from_raw_value(RawValue::store(Self::mk_tag(true, tag_val), ptr))
}
/// Load a GC pointer from a value with a negative tag.
///
/// # Safety
///
/// The value must actually store a `Gc<'gc, T>` with the matching type.
#[inline(always)]
unsafe fn load_gc<T>(&self) -> Gc<'gc, T> {
unsafe fn load_gc<T: GcStorable>(&self) -> Gc<'gc, T> {
unsafe {
let rv = self.raw.value().unwrap_unchecked();
let ptr: *const T = <*const T as RawStore>::from_val(rv);
@@ -214,13 +186,13 @@ impl<'gc> Value<'gc> {
#[inline]
pub(crate) fn new_inline<T: InlineStorable>(val: T) -> Self {
Self::from_raw_value(RawValue::store(Self::mk_tag(false, T::TAG.1), val))
Self::from_raw_value(RawValue::store(Self::mk_tag(T::TAG.0, T::TAG.1), val))
}
#[inline]
pub(crate) fn new_gc<T: GcStorable>(gc: Gc<'gc, T>) -> Self {
let ptr = Gc::as_ptr(gc);
Self::from_raw_value(RawValue::store(Self::mk_tag(true, T::TAG.1), ptr))
Self::from_raw_value(RawValue::store(Self::mk_tag(T::TAG.0, T::TAG.1), ptr))
}
}
@@ -268,6 +240,7 @@ impl<'gc> Value<'gc> {
}
}
#[derive(Clone, Copy, Debug)]
pub(crate) struct Null;
impl RawStore for Null {
fn to_val(self, value: &mut RawValue) {
@@ -278,17 +251,15 @@ impl RawStore for Null {
}
}
// TODO: size?
#[repr(transparent)]
#[derive(Clone, Copy, PartialEq, Eq, Debug, Collect)]
#[collect(require_static)]
pub(crate) struct SmallStringId(u32);
impl RawStore for SmallStringId {
impl RawStore for StringId {
fn to_val(self, value: &mut RawValue) {
self.0.to_val(value);
(self.0.to_usize() as u32).to_val(value);
}
fn from_val(value: &RawValue) -> Self {
Self(u32::from_val(value))
Self(
SymbolU32::try_from_usize(u32::from_val(value) as usize)
.expect("failed to read StringId from Value"),
)
}
}
@@ -320,45 +291,199 @@ impl fmt::Debug for NixString {
}
}
/// Fixed-size attribute set (up to 8 entries).
#[derive(Collect)]
#[derive(Collect, Debug)]
#[collect(no_drop)]
pub(crate) struct SmallAttrSet<'gc> {
// TODO: proper key storage, length tracking, and lookup
inner: [Value<'gc>; 8],
}
/// Hash-table-backed attribute set.
pub(crate) struct AttrSet<'gc> {
inner: HashTable<AttrSetEntry<'gc>>,
pub(crate) entries: SmallVec<[(StringId, Value<'gc>); 4]>,
}
unsafe impl<'gc> Collect for AttrSet<'gc> {
fn trace(&self, cc: &gc_arena::Collection) {
for entry in self.inner.iter() {
Collect::trace(&entry.key, cc);
Collect::trace(&entry.value, cc);
impl<'gc> AttrSet<'gc> {
pub(crate) fn from_sorted(entries: SmallVec<[(StringId, Value<'gc>); 4]>) -> Self {
debug_assert!(entries.is_sorted_by_key(|(key, _)| *key));
Self { entries }
}
pub(crate) fn lookup(&self, key: StringId) -> Option<Value<'gc>> {
self.entries
.binary_search_by_key(&key, |(k, _)| *k)
.ok()
.map(|i| self.entries[i].1.clone())
}
pub(crate) fn has(&self, key: StringId) -> bool {
self.entries
.binary_search_by_key(&key, |(k, _)| *k)
.is_ok()
}
pub(crate) fn merge(&self, other: &Self, mc: &Mutation<'gc>) -> Gc<'gc, Self> {
use std::cmp::Ordering::*;
debug_assert!(self.entries.is_sorted_by_key(|(key, _)| *key));
debug_assert!(other.entries.is_sorted_by_key(|(key, _)| *key));
let mut entries = SmallVec::new();
let mut i = 0;
let mut j = 0;
while i < self.entries.len() && j < other.entries.len() {
match self.entries[i].0.cmp(&other.entries[j].0) {
Less => {
entries.push(self.entries[i].clone());
i += 1;
}
Greater => {
entries.push(other.entries[j].clone());
j += 1;
}
Equal => {
entries.push(other.entries[j].clone());
i += 1;
j += 1;
}
}
}
entries.extend(other.entries[j..].iter().cloned());
entries.extend(self.entries[i..].iter().cloned());
debug_assert!(entries.is_sorted_by_key(|(key, _)| *key));
Gc::new(mc, AttrSet { entries })
}
}
#[derive(Collect, Debug)]
#[collect(no_drop)]
pub(crate) struct List<'gc> {
pub(crate) inner: SmallVec<[Value<'gc>; 4]>,
}
pub(crate) type Thunk<'gc> = RefLock<ThunkState<'gc>>;
#[derive(Collect, Debug)]
#[collect(no_drop)]
pub(crate) enum ThunkState<'gc> {
Pending {
ip: u32,
env: Gc<'gc, RefLock<Env<'gc>>>,
},
Blackhole,
Evaluated(Value<'gc>),
}
#[derive(Collect, Debug)]
#[collect(no_drop)]
pub(crate) struct Env<'gc> {
pub(crate) locals: SmallVec<[Value<'gc>; 4]>,
pub(crate) prev: Option<Gc<'gc, RefLock<Env<'gc>>>>,
}
impl<'gc> Env<'gc> {
pub(crate) fn empty() -> Self {
Env {
locals: SmallVec::new(),
prev: None,
}
}
fn needs_trace() -> bool
where
Self: Sized,
{
true
pub(crate) fn with_arg(
arg: Value<'gc>,
n_locals: u32,
prev: Gc<'gc, RefLock<Env<'gc>>>,
) -> Self {
let mut locals = smallvec::smallvec![Value::default(); 1 + n_locals as usize];
locals[0] = arg;
Env {
locals,
prev: Some(prev),
}
}
}
#[derive(Collect)]
#[derive(Collect, Debug)]
#[collect(no_drop)]
struct AttrSetEntry<'gc> {
key: AttrKey<'gc>,
value: Value<'gc>,
pub(crate) struct Closure<'gc> {
pub(crate) ip: u32,
pub(crate) n_locals: u32,
pub(crate) env: Gc<'gc, RefLock<Env<'gc>>>,
pub(crate) pattern: Option<Gc<'gc, PatternInfo>>,
}
#[derive(Collect)]
#[derive(Collect, Debug)]
#[collect(require_static)]
pub(crate) struct PatternInfo {
pub(crate) required: SmallVec<[StringId; 4]>,
pub(crate) optional: SmallVec<[StringId; 4]>,
pub(crate) ellipsis: bool,
pub(crate) param_spans: Box<[(StringId, u32)]>,
}
#[derive(Clone, Copy, Debug, Collect)]
#[collect(require_static)]
pub(crate) struct PrimOp {
pub(crate) id: u8,
pub(crate) arity: u8,
}
impl RawStore for PrimOp {
fn to_val(self, value: &mut RawValue) {
value.set_data([0, 0, 0, 0, self.id, self.arity]);
}
fn from_val(value: &RawValue) -> Self {
let [.., id, arity] = *value.data();
Self { id, arity }
}
}
#[derive(Collect, Debug)]
#[collect(no_drop)]
pub(crate) enum AttrKey<'gc> {
Small(SmallStringId),
Large(Gc<'gc, str>),
pub(crate) struct PrimOpApp<'gc> {
pub(crate) primop: PrimOp,
pub(crate) args: SmallVec<[Value<'gc>; 2]>,
}
#[repr(transparent)]
pub(crate) struct StrictValue<'gc>(Value<'gc>);
impl<'gc> StrictValue<'gc> {
#[inline]
pub(crate) fn try_from_forced(val: Value<'gc>) -> Option<Self> {
if !val.is::<Thunk<'gc>>() {
Some(Self(val))
} else {
None
}
}
#[inline]
pub(crate) fn into_relaxed(self) -> Value<'gc> {
self.0
}
}
impl<'gc> Deref for StrictValue<'gc> {
type Target = Value<'gc>;
#[inline]
fn deref(&self) -> &Value<'gc> {
&self.0
}
}
impl Clone for StrictValue<'_> {
#[inline]
fn clone(&self) -> Self {
Self(self.0.clone())
}
}
impl fmt::Debug for StrictValue<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(&self.0, f)
}
}
unsafe impl<'gc> Collect<'gc> for StrictValue<'gc> {
const NEEDS_TRACE: bool = true;
fn trace<T: gc_arena::collect::Trace<'gc>>(&self, cc: &mut T) {
self.0.trace(cc);
}
}