593 lines
17 KiB
Rust
593 lines
17 KiB
Rust
use std::cell::UnsafeCell;
|
|
use std::hash::BuildHasher;
|
|
use std::path::Path;
|
|
|
|
use bumpalo::Bump;
|
|
use ghost_cell::{GhostCell, GhostToken};
|
|
use hashbrown::{DefaultHashBuilder, HashMap, HashSet, HashTable};
|
|
use rnix::TextRange;
|
|
use string_interner::DefaultStringInterner;
|
|
|
|
use crate::bytecode::{self, Bytecode, BytecodeContext, Constant};
|
|
use crate::disassembler::{Disassembler, DisassemblerContext};
|
|
use crate::downgrade::*;
|
|
use crate::error::{Error, Result, Source};
|
|
use crate::ir::{ArgId, Ir, IrKey, IrRef, RawIrRef, SymId, ThunkId, ir_content_eq};
|
|
use crate::store::{DaemonStore, Store, StoreConfig};
|
|
use crate::value::{Symbol, Value};
|
|
|
|
fn parse_error_span(error: &rnix::ParseError) -> Option<rnix::TextRange> {
|
|
use rnix::ParseError::*;
|
|
match error {
|
|
Unexpected(range)
|
|
| UnexpectedExtra(range)
|
|
| UnexpectedWanted(_, range, _)
|
|
| UnexpectedDoubleBind(range)
|
|
| DuplicatedArgs(range, _) => Some(*range),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
fn handle_parse_error<'a>(
|
|
errors: impl IntoIterator<Item = &'a rnix::ParseError>,
|
|
source: Source,
|
|
) -> Option<Box<Error>> {
|
|
for err in errors {
|
|
if let Some(span) = parse_error_span(err) {
|
|
return Some(
|
|
Error::parse_error(err.to_string())
|
|
.with_source(source)
|
|
.with_span(span),
|
|
);
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
impl Context {
|
|
pub fn eval(&mut self, _source: Source) -> Result<Value> {
|
|
todo!()
|
|
}
|
|
pub fn eval_shallow(&mut self, _source: Source) -> Result<Value> {
|
|
todo!()
|
|
}
|
|
pub fn eval_deep(&mut self, _source: Source) -> Result<Value> {
|
|
todo!()
|
|
}
|
|
|
|
pub fn eval_repl<'a>(&'a mut self, _source: Source, _scope: &'a HashSet<SymId>) -> Result<Value> {
|
|
todo!()
|
|
}
|
|
|
|
pub fn disassemble(&self, bytecode: &Bytecode) -> String {
|
|
Disassembler::new(bytecode, self).disassemble()
|
|
}
|
|
|
|
pub fn disassemble_colored(&self, bytecode: &Bytecode) -> String {
|
|
Disassembler::new(bytecode, self).disassemble_colored()
|
|
}
|
|
|
|
pub fn add_binding<'a>(
|
|
&'a mut self,
|
|
name: &str,
|
|
expr: &str,
|
|
scope: &'a mut HashSet<SymId>,
|
|
) -> Result<Value> {
|
|
todo!()
|
|
}
|
|
}
|
|
|
|
pub struct Context {
|
|
symbols: DefaultStringInterner,
|
|
global: HashMap<SymId, Ir<'static, RawIrRef<'static>>>,
|
|
sources: Vec<Source>,
|
|
store: DaemonStore,
|
|
spans: UnsafeCell<Vec<(usize, TextRange)>>,
|
|
thunk_count: usize,
|
|
global_strings: Vec<String>,
|
|
global_string_map: HashMap<String, u32>,
|
|
global_constants: Vec<Constant>,
|
|
global_constant_map: HashMap<Constant, u32>,
|
|
synced_strings: usize,
|
|
synced_constants: usize,
|
|
}
|
|
|
|
/// Owns the bump allocator and a read-only reference into it.
|
|
///
|
|
/// # Safety
|
|
/// The `ir` field points into `_bump`'s storage. We use `'static` as a sentinel
|
|
/// lifetime because the struct owns the backing memory. The `as_ref` method
|
|
/// re-binds the lifetime to `&self`, preventing use-after-free.
|
|
struct OwnedIr {
|
|
_bump: Bump,
|
|
ir: RawIrRef<'static>,
|
|
}
|
|
|
|
impl OwnedIr {
|
|
fn as_ref(&self) -> RawIrRef<'_> {
|
|
self.ir
|
|
}
|
|
}
|
|
|
|
impl Context {
|
|
pub fn new() -> Result<Self> {
|
|
let mut symbols = DefaultStringInterner::new();
|
|
let mut global = HashMap::new();
|
|
let builtins_sym = symbols.get_or_intern("builtins");
|
|
global.insert(builtins_sym, Ir::Builtins);
|
|
|
|
let free_globals = [
|
|
"abort",
|
|
"baseNameOf",
|
|
"break",
|
|
"dirOf",
|
|
"derivation",
|
|
"derivationStrict",
|
|
"fetchGit",
|
|
"fetchMercurial",
|
|
"fetchTarball",
|
|
"fetchTree",
|
|
"fromTOML",
|
|
"import",
|
|
"isNull",
|
|
"map",
|
|
"placeholder",
|
|
"removeAttrs",
|
|
"scopedImport",
|
|
"throw",
|
|
"toString",
|
|
];
|
|
let consts = [
|
|
("true", Ir::Bool(true)),
|
|
("false", Ir::Bool(false)),
|
|
("null", Ir::Null),
|
|
];
|
|
|
|
for name in free_globals {
|
|
let name = symbols.get_or_intern(name);
|
|
let value = Ir::Builtin(name);
|
|
global.insert(name, value);
|
|
}
|
|
for (name, value) in consts {
|
|
let name = symbols.get_or_intern(name);
|
|
global.insert(name, value);
|
|
}
|
|
|
|
let config = StoreConfig::from_env();
|
|
let store = DaemonStore::connect(&config.daemon_socket)?;
|
|
|
|
Ok(Self {
|
|
symbols,
|
|
global,
|
|
sources: Vec::new(),
|
|
store,
|
|
spans: UnsafeCell::new(Vec::new()),
|
|
thunk_count: 0,
|
|
global_strings: Vec::new(),
|
|
global_string_map: HashMap::new(),
|
|
global_constants: Vec::new(),
|
|
global_constant_map: HashMap::new(),
|
|
synced_strings: 0,
|
|
synced_constants: 0,
|
|
})
|
|
}
|
|
|
|
fn downgrade_ctx<'ctx, 'id, 'ir>(
|
|
&'ctx mut self,
|
|
bump: &'ir Bump,
|
|
token: GhostToken<'id>,
|
|
extra_scope: Option<Scope<'ctx>>,
|
|
) -> DowngradeCtx<'ctx, 'id, 'ir> {
|
|
let source = self.get_current_source();
|
|
DowngradeCtx::new(
|
|
bump,
|
|
token,
|
|
&mut self.symbols,
|
|
&self.global,
|
|
extra_scope,
|
|
&mut self.thunk_count,
|
|
source,
|
|
)
|
|
}
|
|
|
|
fn get_current_dir(&self) -> &Path {
|
|
self.sources
|
|
.last()
|
|
.as_ref()
|
|
.expect("current_source is not set")
|
|
.get_dir()
|
|
}
|
|
|
|
fn get_current_source(&self) -> Source {
|
|
self.sources
|
|
.last()
|
|
.expect("current_source is not set")
|
|
.clone()
|
|
}
|
|
|
|
fn downgrade<'ctx>(
|
|
&'ctx mut self,
|
|
source: Source,
|
|
extra_scope: Option<Scope<'ctx>>,
|
|
) -> Result<OwnedIr> {
|
|
tracing::debug!("Parsing Nix expression");
|
|
|
|
self.sources.push(source.clone());
|
|
|
|
let root = rnix::Root::parse(&source.src);
|
|
handle_parse_error(root.errors(), source).map_or(Ok(()), Err)?;
|
|
|
|
tracing::debug!("Downgrading Nix expression");
|
|
let expr = root
|
|
.tree()
|
|
.expr()
|
|
.ok_or_else(|| Error::parse_error("unexpected EOF".into()))?;
|
|
let bump = Bump::new();
|
|
GhostToken::new(|token| {
|
|
let ir = self
|
|
.downgrade_ctx(&bump, token, extra_scope)
|
|
.downgrade_toplevel(expr)?;
|
|
let ir = unsafe { std::mem::transmute::<RawIrRef<'_>, RawIrRef<'static>>(ir) };
|
|
Ok(OwnedIr { _bump: bump, ir })
|
|
})
|
|
}
|
|
|
|
pub fn compile_bytecode(&mut self, source: Source) -> Result<Bytecode> {
|
|
let root = self.downgrade(source, None)?;
|
|
tracing::debug!("Generating bytecode");
|
|
let bytecode = bytecode::compile_bytecode(root.as_ref(), self);
|
|
tracing::debug!("Compiled bytecode: {:#04X?}", bytecode.code);
|
|
Ok(bytecode)
|
|
}
|
|
|
|
pub fn get_store_dir(&self) -> &str {
|
|
self.store.get_store_dir()
|
|
}
|
|
}
|
|
|
|
impl BytecodeContext for Context {
|
|
fn intern_string(&mut self, s: &str) -> u32 {
|
|
if let Some(&idx) = self.global_string_map.get(s) {
|
|
return idx;
|
|
}
|
|
let idx = self.global_strings.len() as u32;
|
|
self.global_strings.push(s.to_string());
|
|
self.global_string_map.insert(s.to_string(), idx);
|
|
idx
|
|
}
|
|
|
|
fn intern_constant(&mut self, c: Constant) -> u32 {
|
|
if let Some(&idx) = self.global_constant_map.get(&c) {
|
|
return idx;
|
|
}
|
|
let idx = self.global_constants.len() as u32;
|
|
self.global_constants.push(c.clone());
|
|
self.global_constant_map.insert(c, idx);
|
|
idx
|
|
}
|
|
|
|
fn register_span(&self, range: TextRange) -> u32 {
|
|
// FIXME: SAFETY
|
|
let spans = unsafe { &mut *self.spans.get() };
|
|
let id = spans.len();
|
|
let source_id = self
|
|
.sources
|
|
.len()
|
|
.checked_sub(1)
|
|
.expect("current_source not set");
|
|
spans.push((source_id, range));
|
|
id as u32
|
|
}
|
|
|
|
fn get_sym(&self, id: SymId) -> &str {
|
|
self.symbols.resolve(id).expect("SymId out of bounds")
|
|
}
|
|
|
|
fn get_current_dir(&self) -> &Path {
|
|
Context::get_current_dir(self)
|
|
}
|
|
}
|
|
|
|
impl DisassemblerContext for Context {
|
|
fn lookup_string(&self, id: u32) -> &str {
|
|
self.global_strings
|
|
.get(id as usize)
|
|
.expect("string not found")
|
|
}
|
|
fn lookup_constant(&self, id: u32) -> &Constant {
|
|
self.global_constants
|
|
.get(id as usize)
|
|
.expect("constant not found")
|
|
}
|
|
}
|
|
|
|
enum Scope<'ctx> {
|
|
Global(&'ctx HashMap<SymId, Ir<'static, RawIrRef<'static>>>),
|
|
Repl(&'ctx HashSet<SymId>),
|
|
ScopedImport(HashSet<SymId>),
|
|
Let(HashMap<SymId, ThunkId>),
|
|
Param(SymId, ArgId),
|
|
}
|
|
|
|
struct ScopeGuard<'a, 'ctx, 'id, 'ir> {
|
|
ctx: &'a mut DowngradeCtx<'ctx, 'id, 'ir>,
|
|
}
|
|
|
|
impl Drop for ScopeGuard<'_, '_, '_, '_> {
|
|
fn drop(&mut self) {
|
|
self.ctx.scopes.pop();
|
|
}
|
|
}
|
|
|
|
impl<'id, 'ir, 'ctx> ScopeGuard<'_, 'ctx, 'id, 'ir> {
|
|
fn as_ctx(&mut self) -> &mut DowngradeCtx<'ctx, 'id, 'ir> {
|
|
self.ctx
|
|
}
|
|
}
|
|
|
|
struct ThunkScope<'id, 'ir> {
|
|
bindings: bumpalo::collections::Vec<'ir, (ThunkId, IrRef<'id, 'ir>)>,
|
|
cache: HashTable<(IrRef<'id, 'ir>, ThunkId)>,
|
|
hasher: DefaultHashBuilder,
|
|
}
|
|
|
|
impl<'id, 'ir> ThunkScope<'id, 'ir> {
|
|
fn new_in(bump: &'ir Bump) -> Self {
|
|
Self {
|
|
bindings: bumpalo::collections::Vec::new_in(bump),
|
|
cache: HashTable::new(),
|
|
hasher: DefaultHashBuilder::default(),
|
|
}
|
|
}
|
|
|
|
fn lookup_cache(&self, key: IrRef<'id, 'ir>, token: &GhostToken<'id>) -> Option<ThunkId> {
|
|
let hash = self.hasher.hash_one(IrKey(key, token));
|
|
self.cache
|
|
.find(hash, |&(ir, _)| ir_content_eq(key, ir, token))
|
|
.map(|&(_, id)| id)
|
|
}
|
|
|
|
fn add_binding(&mut self, id: ThunkId, ir: IrRef<'id, 'ir>, token: &GhostToken<'id>) {
|
|
self.bindings.push((id, ir));
|
|
let hash = self.hasher.hash_one(IrKey(ir, token));
|
|
self.cache.insert_unique(hash, (ir, id), |&(ir, _)| {
|
|
self.hasher.hash_one(IrKey(ir, token))
|
|
});
|
|
}
|
|
|
|
fn extend_bindings(&mut self, iter: impl IntoIterator<Item = (ThunkId, IrRef<'id, 'ir>)>) {
|
|
self.bindings.extend(iter);
|
|
}
|
|
}
|
|
|
|
struct DowngradeCtx<'ctx, 'id, 'ir> {
|
|
bump: &'ir Bump,
|
|
token: GhostToken<'id>,
|
|
symbols: &'ctx mut DefaultStringInterner,
|
|
source: Source,
|
|
scopes: Vec<Scope<'ctx>>,
|
|
with_scope_count: usize,
|
|
arg_count: usize,
|
|
thunk_count: &'ctx mut usize,
|
|
thunk_scopes: Vec<ThunkScope<'id, 'ir>>,
|
|
}
|
|
|
|
fn should_thunk<'id>(ir: IrRef<'id, '_>, token: &GhostToken<'id>) -> bool {
|
|
!matches!(
|
|
ir.borrow(token),
|
|
Ir::Builtin(_)
|
|
| Ir::Builtins
|
|
| Ir::Int(_)
|
|
| Ir::Float(_)
|
|
| Ir::Bool(_)
|
|
| Ir::Null
|
|
| Ir::Str(_)
|
|
| Ir::Thunk(_)
|
|
)
|
|
}
|
|
|
|
impl<'ctx, 'id, 'ir> DowngradeCtx<'ctx, 'id, 'ir> {
|
|
fn new(
|
|
bump: &'ir Bump,
|
|
token: GhostToken<'id>,
|
|
symbols: &'ctx mut DefaultStringInterner,
|
|
global: &'ctx HashMap<SymId, Ir<'static, RawIrRef<'static>>>,
|
|
extra_scope: Option<Scope<'ctx>>,
|
|
thunk_count: &'ctx mut usize,
|
|
source: Source,
|
|
) -> Self {
|
|
Self {
|
|
bump,
|
|
token,
|
|
symbols,
|
|
source,
|
|
scopes: std::iter::once(Scope::Global(global))
|
|
.chain(extra_scope)
|
|
.collect(),
|
|
thunk_count,
|
|
arg_count: 0,
|
|
with_scope_count: 0,
|
|
thunk_scopes: vec![ThunkScope::new_in(bump)],
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'ctx: 'ir, 'id, 'ir> DowngradeContext<'id, 'ir> for DowngradeCtx<'ctx, 'id, 'ir> {
|
|
fn new_expr(&self, expr: Ir<'ir, IrRef<'id, 'ir>>) -> IrRef<'id, 'ir> {
|
|
IrRef::new(self.bump.alloc(GhostCell::new(expr)))
|
|
}
|
|
|
|
fn new_arg(&mut self) -> ArgId {
|
|
self.arg_count += 1;
|
|
ArgId(self.arg_count - 1)
|
|
}
|
|
|
|
fn maybe_thunk(&mut self, ir: IrRef<'id, 'ir>) -> IrRef<'id, 'ir> {
|
|
if !should_thunk(ir, &self.token) {
|
|
return ir;
|
|
}
|
|
|
|
let cached = self
|
|
.thunk_scopes
|
|
.last()
|
|
.expect("no active cache scope")
|
|
.lookup_cache(ir, &self.token);
|
|
|
|
if let Some(id) = cached {
|
|
return IrRef::alloc(self.bump, Ir::Thunk(id));
|
|
}
|
|
|
|
let id = ThunkId(*self.thunk_count);
|
|
*self.thunk_count = self.thunk_count.checked_add(1).expect("thunk id overflow");
|
|
self.thunk_scopes
|
|
.last_mut()
|
|
.expect("no active cache scope")
|
|
.add_binding(id, ir, &self.token);
|
|
IrRef::alloc(self.bump, Ir::Thunk(id))
|
|
}
|
|
|
|
fn new_sym(&mut self, sym: String) -> SymId {
|
|
self.symbols.get_or_intern(sym)
|
|
}
|
|
|
|
fn get_sym(&self, id: SymId) -> Symbol<'_> {
|
|
self.symbols.resolve(id).expect("no symbol found").into()
|
|
}
|
|
|
|
fn lookup(&self, sym: SymId, span: TextRange) -> Result<IrRef<'id, 'ir>> {
|
|
for scope in self.scopes.iter().rev() {
|
|
match scope {
|
|
&Scope::Global(global_scope) => {
|
|
if let Some(expr) = global_scope.get(&sym) {
|
|
let ir = match expr {
|
|
Ir::Builtins => Ir::Builtins,
|
|
Ir::Builtin(s) => Ir::Builtin(*s),
|
|
Ir::Bool(b) => Ir::Bool(*b),
|
|
Ir::Null => Ir::Null,
|
|
_ => unreachable!("globals should only contain leaf IR nodes"),
|
|
};
|
|
return Ok(self.new_expr(ir));
|
|
}
|
|
}
|
|
&Scope::Repl(repl_bindings) => {
|
|
if repl_bindings.contains(&sym) {
|
|
return Ok(self.new_expr(Ir::ReplBinding(sym)));
|
|
}
|
|
}
|
|
Scope::ScopedImport(scoped_bindings) => {
|
|
if scoped_bindings.contains(&sym) {
|
|
return Ok(self.new_expr(Ir::ScopedImportBinding(sym)));
|
|
}
|
|
}
|
|
Scope::Let(let_scope) => {
|
|
if let Some(&expr) = let_scope.get(&sym) {
|
|
return Ok(self.new_expr(Ir::Thunk(expr)));
|
|
}
|
|
}
|
|
&Scope::Param(param_sym, id) => {
|
|
if param_sym == sym {
|
|
return Ok(self.new_expr(Ir::Arg(id)));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if self.with_scope_count > 0 {
|
|
Ok(self.new_expr(Ir::WithLookup(sym)))
|
|
} else {
|
|
Err(Error::downgrade_error(
|
|
format!("'{}' not found", self.get_sym(sym)),
|
|
self.get_current_source(),
|
|
span,
|
|
))
|
|
}
|
|
}
|
|
|
|
fn get_current_source(&self) -> Source {
|
|
self.source.clone()
|
|
}
|
|
|
|
fn with_let_scope<F, R>(&mut self, keys: &[SymId], f: F) -> Result<R>
|
|
where
|
|
F: FnOnce(&mut Self) -> Result<(bumpalo::collections::Vec<'ir, IrRef<'id, 'ir>>, R)>,
|
|
{
|
|
let base = *self.thunk_count;
|
|
*self.thunk_count = self
|
|
.thunk_count
|
|
.checked_add(keys.len())
|
|
.expect("thunk id overflow");
|
|
let iter = keys.iter().enumerate().map(|(offset, &key)| {
|
|
(
|
|
key,
|
|
ThunkId(unsafe { base.checked_add(offset).unwrap_unchecked() }),
|
|
)
|
|
});
|
|
self.scopes.push(Scope::Let(iter.collect()));
|
|
let (vals, ret) = {
|
|
let mut guard = ScopeGuard { ctx: self };
|
|
f(guard.as_ctx())?
|
|
};
|
|
assert_eq!(keys.len(), vals.len());
|
|
let scope = self.thunk_scopes.last_mut().expect("no active thunk scope");
|
|
scope.extend_bindings((base..base + keys.len()).map(ThunkId).zip(vals));
|
|
Ok(ret)
|
|
}
|
|
|
|
fn with_param_scope<F, R>(&mut self, param: SymId, arg: ArgId, f: F) -> R
|
|
where
|
|
F: FnOnce(&mut Self) -> R,
|
|
{
|
|
self.scopes.push(Scope::Param(param, arg));
|
|
let mut guard = ScopeGuard { ctx: self };
|
|
f(guard.as_ctx())
|
|
}
|
|
|
|
fn with_with_scope<F, R>(&mut self, f: F) -> R
|
|
where
|
|
F: FnOnce(&mut Self) -> R,
|
|
{
|
|
self.with_scope_count += 1;
|
|
let ret = f(self);
|
|
self.with_scope_count -= 1;
|
|
ret
|
|
}
|
|
|
|
fn with_thunk_scope<F, R>(
|
|
&mut self,
|
|
f: F,
|
|
) -> (
|
|
R,
|
|
bumpalo::collections::Vec<'ir, (ThunkId, IrRef<'id, 'ir>)>,
|
|
)
|
|
where
|
|
F: FnOnce(&mut Self) -> R,
|
|
{
|
|
self.thunk_scopes.push(ThunkScope::new_in(self.bump));
|
|
let ret = f(self);
|
|
(
|
|
ret,
|
|
self.thunk_scopes
|
|
.pop()
|
|
.expect("no thunk scope left???")
|
|
.bindings,
|
|
)
|
|
}
|
|
|
|
fn bump(&self) -> &'ir bumpalo::Bump {
|
|
self.bump
|
|
}
|
|
}
|
|
|
|
impl<'id, 'ir, 'ctx: 'ir> DowngradeCtx<'ctx, 'id, 'ir> {
|
|
fn downgrade_toplevel(mut self, root: rnix::ast::Expr) -> Result<RawIrRef<'ir>> {
|
|
let body = root.downgrade(&mut self)?;
|
|
let thunks = self
|
|
.thunk_scopes
|
|
.pop()
|
|
.expect("no thunk scope left???")
|
|
.bindings;
|
|
let ir = IrRef::alloc(self.bump, Ir::TopLevel { body, thunks });
|
|
Ok(ir.freeze(self.token))
|
|
}
|
|
}
|