feat: regex related builtins

This commit is contained in:
2026-01-16 10:08:29 +08:00
parent e620f39a4a
commit 97e5e7b995
5 changed files with 507 additions and 17 deletions

View File

@@ -175,6 +175,8 @@ export const builtins: any = {
substring: mkPrimop(string.substring, "substring", 3), substring: mkPrimop(string.substring, "substring", 3),
concatStringsSep: mkPrimop(string.concatStringsSep, "concatStringsSep", 2), concatStringsSep: mkPrimop(string.concatStringsSep, "concatStringsSep", 2),
baseNameOf: mkPrimop(string.baseNameOf, "baseNameOf", 1), baseNameOf: mkPrimop(string.baseNameOf, "baseNameOf", 1),
match: mkPrimop(string.match, "match", 2),
split: mkPrimop(string.split, "split", 2),
seq: mkPrimop(functional.seq, "seq", 2), seq: mkPrimop(functional.seq, "seq", 2),
deepSeq: mkPrimop(functional.deepSeq, "deepSeq", 2), deepSeq: mkPrimop(functional.deepSeq, "deepSeq", 2),
@@ -234,14 +236,12 @@ export const builtins: any = {
functionArgs: mkPrimop(misc.functionArgs, "functionArgs", 1), functionArgs: mkPrimop(misc.functionArgs, "functionArgs", 1),
genericClosure: mkPrimop(misc.genericClosure, "genericClosure", 1), genericClosure: mkPrimop(misc.genericClosure, "genericClosure", 1),
getFlake: mkPrimop(misc.getFlake, "getFlake", 1), getFlake: mkPrimop(misc.getFlake, "getFlake", 1),
match: mkPrimop(misc.match, "match", 2),
outputOf: mkPrimop(misc.outputOf, "outputOf", 2), outputOf: mkPrimop(misc.outputOf, "outputOf", 2),
parseDrvName: mkPrimop(misc.parseDrvName, "parseDrvName", 1), parseDrvName: mkPrimop(misc.parseDrvName, "parseDrvName", 1),
parseFlakeName: mkPrimop(misc.parseFlakeName, "parseFlakeName", 1), parseFlakeName: mkPrimop(misc.parseFlakeName, "parseFlakeName", 1),
parseFlakeRef: mkPrimop(misc.parseFlakeRef, "parseFlakeRef", 1), parseFlakeRef: mkPrimop(misc.parseFlakeRef, "parseFlakeRef", 1),
placeholder: mkPrimop(misc.placeholder, "placeholder", 1), placeholder: mkPrimop(misc.placeholder, "placeholder", 1),
replaceStrings: mkPrimop(misc.replaceStrings, "replaceStrings", 3), replaceStrings: mkPrimop(misc.replaceStrings, "replaceStrings", 3),
split: mkPrimop(misc.split, "split", 2),
splitVersion: mkPrimop(misc.splitVersion, "splitVersion", 1), splitVersion: mkPrimop(misc.splitVersion, "splitVersion", 1),
traceVerbose: mkPrimop(misc.traceVerbose, "traceVerbose", 2), traceVerbose: mkPrimop(misc.traceVerbose, "traceVerbose", 2),
tryEval: mkPrimop(misc.tryEval, "tryEval", 1), tryEval: mkPrimop(misc.tryEval, "tryEval", 1),
@@ -259,6 +259,6 @@ export const builtins: any = {
langVersion: 6, langVersion: 6,
nixPath: [], nixPath: [],
nixVersion: "NIX_JS_VERSION", nixVersion: "2.31.2",
storeDir: "/nix/store", storeDir: "/nix/store",
}; };

View File

@@ -160,12 +160,6 @@ export const getFlake = (attrs: NixValue): never => {
throw new Error("Not implemented: getFlake"); throw new Error("Not implemented: getFlake");
}; };
export const match =
(regex: NixValue) =>
(str: NixValue): never => {
throw new Error("Not implemented: match");
};
export const outputOf = export const outputOf =
(drv: NixValue) => (drv: NixValue) =>
(out: NixValue): never => { (out: NixValue): never => {
@@ -191,16 +185,77 @@ export const placeholder = (output: NixValue): never => {
export const replaceStrings = export const replaceStrings =
(from: NixValue) => (from: NixValue) =>
(to: NixValue) => (to: NixValue) =>
(s: NixValue): never => { (s: NixValue): NixValue => {
throw new Error("Not implemented: replaceStrings"); const fromList = forceList(from);
const toList = forceList(to);
const inputStr = forceString(s);
if (fromList.length !== toList.length) {
throw new Error(
"'from' and 'to' arguments passed to builtins.replaceStrings have different lengths"
);
}
const toCache = new Map<number, string>();
let result = "";
let pos = 0;
while (pos <= inputStr.length) {
let found = false;
for (let i = 0; i < fromList.length; i++) {
const pattern = forceString(fromList[i]);
if (inputStr.substring(pos).startsWith(pattern)) {
found = true;
if (!toCache.has(i)) {
toCache.set(i, forceString(toList[i]));
}
const replacement = toCache.get(i)!;
result += replacement;
if (pattern.length === 0) {
if (pos < inputStr.length) {
result += inputStr[pos];
}
pos++;
} else {
pos += pattern.length;
}
break;
}
}
if (!found) {
if (pos < inputStr.length) {
result += inputStr[pos];
}
pos++;
}
}
return result;
}; };
export const split = (regex: NixValue, str: NixValue): never => {
throw new Error("Not implemented: split");
};
export const splitVersion = (s: NixValue): never => { export const splitVersion = (s: NixValue): NixValue => {
throw new Error("Not implemented: splitVersion"); const version = forceString(s);
const components: string[] = [];
let idx = 0;
while (idx < version.length) {
const result = nextComponent(version, idx);
if (result.component === "") {
break;
}
components.push(result.component);
idx = result.nextIndex;
}
return components;
}; };
export const traceVerbose = (e1: NixValue, e2: NixValue): never => { export const traceVerbose = (e1: NixValue, e2: NixValue): never => {

View File

@@ -42,3 +42,122 @@ export const baseNameOf = (x: NixValue): string => {
return str.substring(pos, last + 1); return str.substring(pos, last + 1);
}; };
const POSIX_CLASSES: Record<string, string> = {
alnum: "a-zA-Z0-9",
alpha: "a-zA-Z",
blank: " \\t",
digit: "0-9",
lower: "a-z",
upper: "A-Z",
space: "\\s",
xdigit: "0-9A-Fa-f",
punct: "\\-!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~",
};
function posixToJsRegex(pattern: string, fullMatch: boolean = false): RegExp {
let jsPattern = pattern;
jsPattern = jsPattern.replace(/\[(\^?)(?:\[:(\w+):\])+\]/g, (match) => {
const isNegated = match[1] === "^";
const classNames = [...match.matchAll(/\[:(\w+):\]/g)].map((m) => m[1]);
const combined = classNames
.map((className) => {
const replacement = POSIX_CLASSES[className];
if (!replacement) {
throw new Error(`Unknown POSIX character class: ${className}`);
}
return replacement;
})
.join("");
return isNegated ? `[^${combined}]` : `[${combined}]`;
});
jsPattern = jsPattern.replace(/\[:(\w+):\]/g, (_match, className) => {
const replacement = POSIX_CLASSES[className];
if (!replacement) {
throw new Error(`Unknown POSIX character class: ${className}`);
}
return replacement;
});
if (fullMatch) {
if (!jsPattern.startsWith("^")) {
jsPattern = "^" + jsPattern;
}
if (!jsPattern.endsWith("$")) {
jsPattern = jsPattern + "$";
}
}
return new RegExp(jsPattern, "u");
}
export const match =
(regex: NixValue) =>
(str: NixValue): NixValue => {
const regexStr = forceString(regex);
const inputStr = forceString(str);
try {
const re = posixToJsRegex(regexStr, true);
const result = inputStr.match(re);
if (!result) {
return null;
}
const groups: NixValue[] = [];
for (let i = 1; i < result.length; i++) {
groups.push(result[i] !== undefined ? result[i] : null);
}
return groups;
} catch (e) {
throw new Error(`Invalid regular expression '${regexStr}': ${e}`);
}
};
export const split =
(regex: NixValue) =>
(str: NixValue): NixValue => {
const regexStr = forceString(regex);
const inputStr = forceString(str);
try {
const re = posixToJsRegex(regexStr);
const reGlobal = new RegExp(re.source, re.flags + "g");
const result: NixValue[] = [];
let lastIndex = 0;
let match: RegExpExecArray | null;
while ((match = reGlobal.exec(inputStr)) !== null) {
result.push(inputStr.substring(lastIndex, match.index));
const groups: NixValue[] = [];
for (let i = 1; i < match.length; i++) {
groups.push(match[i] !== undefined ? match[i] : null);
}
result.push(groups);
lastIndex = match.index + match[0].length;
if (match[0].length === 0) {
reGlobal.lastIndex++;
}
}
if (lastIndex === 0) {
return [inputStr];
}
result.push(inputStr.substring(lastIndex));
return result;
} catch (e) {
throw new Error(`Invalid regular expression '${regexStr}': ${e}`);
}
};

View File

@@ -78,7 +78,7 @@ impl<Ctx: CodegenContext> Compile<Ctx> for Ir {
&Ir::Assert(Assert { assertion, expr }) => { &Ir::Assert(Assert { assertion, expr }) => {
let assertion = ctx.get_ir(assertion).compile(ctx); let assertion = ctx.get_ir(assertion).compile(ctx);
let expr = ctx.get_ir(expr).compile(ctx); let expr = ctx.get_ir(expr).compile(ctx);
format!("({assertion})?({expr}):(()=>{{throw \"assertion failed\"}})()") format!("({assertion})?({expr}):(()=>{{throw new Error(\"assertion failed\")}})()")
} }
} }
} }

316
nix-js/tests/regex.rs Normal file
View File

@@ -0,0 +1,316 @@
mod utils;
use nix_js::value::{List, Value};
use utils::eval;
#[test]
fn test_match_exact_full_string() {
assert_eq!(
eval(r#"builtins.match "foobar" "foobar""#),
Value::List(List::new(vec![]))
);
}
#[test]
fn test_match_partial_returns_null() {
assert_eq!(eval(r#"builtins.match "foo" "foobar""#), Value::Null);
}
#[test]
fn test_match_with_capture_groups() {
assert_eq!(
eval(r#"builtins.match "(.*)\\.nix" "foobar.nix""#),
Value::List(List::new(vec![Value::String("foobar".into())]))
);
}
#[test]
fn test_match_multiple_capture_groups() {
assert_eq!(
eval(r#"builtins.match "((.*)/)?([^/]*)\\.nix" "foobar.nix""#),
Value::List(List::new(vec![
Value::Null,
Value::Null,
Value::String("foobar".into())
]))
);
}
#[test]
fn test_match_with_path() {
assert_eq!(
eval(r#"builtins.match "((.*)/)?([^/]*)\\.nix" "/path/to/foobar.nix""#),
Value::List(List::new(vec![
Value::String("/path/to/".into()),
Value::String("/path/to".into()),
Value::String("foobar".into())
]))
);
}
#[test]
fn test_match_posix_space_class() {
assert_eq!(
eval(r#"builtins.match "[[:space:]]+([^[:space:]]+)[[:space:]]+" " foo ""#),
Value::List(List::new(vec![Value::String("foo".into())]))
);
}
#[test]
fn test_match_posix_upper_class() {
assert_eq!(eval(r#"builtins.match "[[:space:]]+([[:upper:]]+)[[:space:]]+" " foo ""#), Value::Null);
assert_eq!(
eval(r#"builtins.match "[[:space:]]+([[:upper:]]+)[[:space:]]+" " FOO ""#),
Value::List(List::new(vec![Value::String("FOO".into())]))
);
}
#[test]
fn test_match_quantifiers() {
assert_eq!(
eval(r#"builtins.match "fo*" "f""#),
Value::List(List::new(vec![]))
);
assert_eq!(eval(r#"builtins.match "fo+" "f""#), Value::Null);
assert_eq!(
eval(r#"builtins.match "fo{1,2}" "foo""#),
Value::List(List::new(vec![]))
);
assert_eq!(eval(r#"builtins.match "fo{1,2}" "fooo""#), Value::Null);
}
#[test]
fn test_split_non_capturing() {
assert_eq!(
eval(r#"builtins.split "foobar" "foobar""#),
Value::List(List::new(vec![
Value::String("".into()),
Value::List(List::new(vec![])),
Value::String("".into())
]))
);
}
#[test]
fn test_split_no_match() {
assert_eq!(
eval(r#"builtins.split "fo+" "f""#),
Value::List(List::new(vec![Value::String("f".into())]))
);
}
#[test]
fn test_split_with_capture_group() {
assert_eq!(
eval(r#"builtins.split "(fo*)" "foobar""#),
Value::List(List::new(vec![
Value::String("".into()),
Value::List(List::new(vec![Value::String("foo".into())])),
Value::String("bar".into())
]))
);
}
#[test]
fn test_split_multiple_matches() {
assert_eq!(
eval(r#"builtins.split "(b)" "foobarbaz""#),
Value::List(List::new(vec![
Value::String("foo".into()),
Value::List(List::new(vec![Value::String("b".into())])),
Value::String("ar".into()),
Value::List(List::new(vec![Value::String("b".into())])),
Value::String("az".into())
]))
);
}
#[test]
fn test_split_with_multiple_groups() {
assert_eq!(
eval(r#"builtins.split "(f)(o*)" "foo""#),
Value::List(List::new(vec![
Value::String("".into()),
Value::List(List::new(vec![
Value::String("f".into()),
Value::String("oo".into())
])),
Value::String("".into())
]))
);
}
#[test]
fn test_split_with_optional_groups() {
assert_eq!(
eval(r#"builtins.split "(a)|(c)" "abc""#),
Value::List(List::new(vec![
Value::String("".into()),
Value::List(List::new(vec![Value::String("a".into()), Value::Null])),
Value::String("b".into()),
Value::List(List::new(vec![Value::Null, Value::String("c".into())])),
Value::String("".into())
]))
);
}
#[test]
fn test_split_greedy_matching() {
assert_eq!(
eval(r#"builtins.split "(o+)" "oooofoooo""#),
Value::List(List::new(vec![
Value::String("".into()),
Value::List(List::new(vec![Value::String("oooo".into())])),
Value::String("f".into()),
Value::List(List::new(vec![Value::String("oooo".into())])),
Value::String("".into())
]))
);
}
#[test]
fn test_split_posix_classes() {
assert_eq!(
eval(r#"builtins.split "([[:upper:]]+)" " FOO ""#),
Value::List(List::new(vec![
Value::String(" ".into()),
Value::List(List::new(vec![Value::String("FOO".into())])),
Value::String(" ".into())
]))
);
}
#[test]
fn test_replace_basic() {
assert_eq!(
eval(r#"builtins.replaceStrings ["o"] ["a"] "foobar""#),
Value::String("faabar".into())
);
}
#[test]
fn test_replace_with_empty() {
assert_eq!(
eval(r#"builtins.replaceStrings ["o"] [""] "foobar""#),
Value::String("fbar".into())
);
}
#[test]
fn test_replace_multiple_patterns() {
assert_eq!(
eval(r#"builtins.replaceStrings ["oo" "a"] ["a" "oo"] "foobar""#),
Value::String("faboor".into())
);
}
#[test]
fn test_replace_first_match_wins() {
assert_eq!(
eval(r#"builtins.replaceStrings ["oo" "oo"] ["u" "i"] "foobar""#),
Value::String("fubar".into())
);
}
#[test]
fn test_replace_empty_pattern() {
assert_eq!(
eval(r#"builtins.replaceStrings [""] ["X"] "abc""#),
Value::String("XaXbXcX".into())
);
}
#[test]
fn test_replace_empty_pattern_empty_string() {
assert_eq!(
eval(r#"builtins.replaceStrings [""] ["X"] """#),
Value::String("X".into())
);
}
#[test]
fn test_replace_simple_char() {
assert_eq!(
eval(r#"builtins.replaceStrings ["-"] ["_"] "a-b""#),
Value::String("a_b".into())
);
}
#[test]
fn test_replace_longer_pattern() {
assert_eq!(
eval(r#"builtins.replaceStrings ["oo"] ["u"] "foobar""#),
Value::String("fubar".into())
);
}
#[test]
fn test_replace_different_lengths() {
let result = std::panic::catch_unwind(|| {
eval(r#"builtins.replaceStrings ["a" "b"] ["x"] "test""#)
});
assert!(result.is_err());
}
#[test]
fn test_split_version_simple() {
assert_eq!(
eval(r#"builtins.splitVersion "1.2.3""#),
Value::List(List::new(vec![
Value::String("1".into()),
Value::String("2".into()),
Value::String("3".into())
]))
);
}
#[test]
fn test_split_version_with_pre() {
assert_eq!(
eval(r#"builtins.splitVersion "2.3.0pre1234""#),
Value::List(List::new(vec![
Value::String("2".into()),
Value::String("3".into()),
Value::String("0".into()),
Value::String("pre".into()),
Value::String("1234".into())
]))
);
}
#[test]
fn test_split_version_with_letters() {
assert_eq!(
eval(r#"builtins.splitVersion "2.3a""#),
Value::List(List::new(vec![
Value::String("2".into()),
Value::String("3".into()),
Value::String("a".into())
]))
);
}
#[test]
fn test_split_version_with_dashes() {
assert_eq!(
eval(r#"builtins.splitVersion "2.3-beta1""#),
Value::List(List::new(vec![
Value::String("2".into()),
Value::String("3".into()),
Value::String("beta".into()),
Value::String("1".into())
]))
);
}
#[test]
fn test_split_version_empty() {
assert_eq!(
eval(r#"builtins.splitVersion """#),
Value::List(List::new(vec![]))
);
}