From 97e5e7b995a6ccb0fe67bf3e6738a9bb97053268 Mon Sep 17 00:00:00 2001 From: imxyy_soope_ Date: Fri, 16 Jan 2026 10:08:29 +0800 Subject: [PATCH] feat: regex related builtins --- nix-js/runtime-ts/src/builtins/index.ts | 6 +- nix-js/runtime-ts/src/builtins/misc.ts | 81 +++++- nix-js/runtime-ts/src/builtins/string.ts | 119 +++++++++ nix-js/src/codegen.rs | 2 +- nix-js/tests/regex.rs | 316 +++++++++++++++++++++++ 5 files changed, 507 insertions(+), 17 deletions(-) create mode 100644 nix-js/tests/regex.rs diff --git a/nix-js/runtime-ts/src/builtins/index.ts b/nix-js/runtime-ts/src/builtins/index.ts index 7234b8d..e7b4bca 100644 --- a/nix-js/runtime-ts/src/builtins/index.ts +++ b/nix-js/runtime-ts/src/builtins/index.ts @@ -175,6 +175,8 @@ export const builtins: any = { substring: mkPrimop(string.substring, "substring", 3), concatStringsSep: mkPrimop(string.concatStringsSep, "concatStringsSep", 2), baseNameOf: mkPrimop(string.baseNameOf, "baseNameOf", 1), + match: mkPrimop(string.match, "match", 2), + split: mkPrimop(string.split, "split", 2), seq: mkPrimop(functional.seq, "seq", 2), deepSeq: mkPrimop(functional.deepSeq, "deepSeq", 2), @@ -234,14 +236,12 @@ export const builtins: any = { functionArgs: mkPrimop(misc.functionArgs, "functionArgs", 1), genericClosure: mkPrimop(misc.genericClosure, "genericClosure", 1), getFlake: mkPrimop(misc.getFlake, "getFlake", 1), - match: mkPrimop(misc.match, "match", 2), outputOf: mkPrimop(misc.outputOf, "outputOf", 2), parseDrvName: mkPrimop(misc.parseDrvName, "parseDrvName", 1), parseFlakeName: mkPrimop(misc.parseFlakeName, "parseFlakeName", 1), parseFlakeRef: mkPrimop(misc.parseFlakeRef, "parseFlakeRef", 1), placeholder: mkPrimop(misc.placeholder, "placeholder", 1), replaceStrings: mkPrimop(misc.replaceStrings, "replaceStrings", 3), - split: mkPrimop(misc.split, "split", 2), splitVersion: mkPrimop(misc.splitVersion, "splitVersion", 1), traceVerbose: mkPrimop(misc.traceVerbose, "traceVerbose", 2), tryEval: mkPrimop(misc.tryEval, "tryEval", 1), @@ -259,6 +259,6 @@ export const builtins: any = { langVersion: 6, nixPath: [], - nixVersion: "NIX_JS_VERSION", + nixVersion: "2.31.2", storeDir: "/nix/store", }; diff --git a/nix-js/runtime-ts/src/builtins/misc.ts b/nix-js/runtime-ts/src/builtins/misc.ts index fda2d44..75e4367 100644 --- a/nix-js/runtime-ts/src/builtins/misc.ts +++ b/nix-js/runtime-ts/src/builtins/misc.ts @@ -160,12 +160,6 @@ export const getFlake = (attrs: NixValue): never => { throw new Error("Not implemented: getFlake"); }; -export const match = - (regex: NixValue) => - (str: NixValue): never => { - throw new Error("Not implemented: match"); - }; - export const outputOf = (drv: NixValue) => (out: NixValue): never => { @@ -191,16 +185,77 @@ export const placeholder = (output: NixValue): never => { export const replaceStrings = (from: NixValue) => (to: NixValue) => - (s: NixValue): never => { - throw new Error("Not implemented: replaceStrings"); + (s: NixValue): NixValue => { + const fromList = forceList(from); + const toList = forceList(to); + const inputStr = forceString(s); + + if (fromList.length !== toList.length) { + throw new Error( + "'from' and 'to' arguments passed to builtins.replaceStrings have different lengths" + ); + } + + const toCache = new Map(); + + let result = ""; + let pos = 0; + + while (pos <= inputStr.length) { + let found = false; + + for (let i = 0; i < fromList.length; i++) { + const pattern = forceString(fromList[i]); + + if (inputStr.substring(pos).startsWith(pattern)) { + found = true; + + if (!toCache.has(i)) { + toCache.set(i, forceString(toList[i])); + } + const replacement = toCache.get(i)!; + + result += replacement; + + if (pattern.length === 0) { + if (pos < inputStr.length) { + result += inputStr[pos]; + } + pos++; + } else { + pos += pattern.length; + } + break; + } + } + + if (!found) { + if (pos < inputStr.length) { + result += inputStr[pos]; + } + pos++; + } + } + + return result; }; -export const split = (regex: NixValue, str: NixValue): never => { - throw new Error("Not implemented: split"); -}; -export const splitVersion = (s: NixValue): never => { - throw new Error("Not implemented: splitVersion"); +export const splitVersion = (s: NixValue): NixValue => { + const version = forceString(s); + const components: string[] = []; + let idx = 0; + + while (idx < version.length) { + const result = nextComponent(version, idx); + if (result.component === "") { + break; + } + components.push(result.component); + idx = result.nextIndex; + } + + return components; }; export const traceVerbose = (e1: NixValue, e2: NixValue): never => { diff --git a/nix-js/runtime-ts/src/builtins/string.ts b/nix-js/runtime-ts/src/builtins/string.ts index 75a5ed3..be6df82 100644 --- a/nix-js/runtime-ts/src/builtins/string.ts +++ b/nix-js/runtime-ts/src/builtins/string.ts @@ -42,3 +42,122 @@ export const baseNameOf = (x: NixValue): string => { return str.substring(pos, last + 1); }; + +const POSIX_CLASSES: Record = { + alnum: "a-zA-Z0-9", + alpha: "a-zA-Z", + blank: " \\t", + digit: "0-9", + lower: "a-z", + upper: "A-Z", + space: "\\s", + xdigit: "0-9A-Fa-f", + punct: "\\-!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~", +}; + +function posixToJsRegex(pattern: string, fullMatch: boolean = false): RegExp { + let jsPattern = pattern; + + jsPattern = jsPattern.replace(/\[(\^?)(?:\[:(\w+):\])+\]/g, (match) => { + const isNegated = match[1] === "^"; + const classNames = [...match.matchAll(/\[:(\w+):\]/g)].map((m) => m[1]); + + const combined = classNames + .map((className) => { + const replacement = POSIX_CLASSES[className]; + if (!replacement) { + throw new Error(`Unknown POSIX character class: ${className}`); + } + return replacement; + }) + .join(""); + + return isNegated ? `[^${combined}]` : `[${combined}]`; + }); + + jsPattern = jsPattern.replace(/\[:(\w+):\]/g, (_match, className) => { + const replacement = POSIX_CLASSES[className]; + if (!replacement) { + throw new Error(`Unknown POSIX character class: ${className}`); + } + return replacement; + }); + + if (fullMatch) { + if (!jsPattern.startsWith("^")) { + jsPattern = "^" + jsPattern; + } + if (!jsPattern.endsWith("$")) { + jsPattern = jsPattern + "$"; + } + } + + return new RegExp(jsPattern, "u"); +} + + +export const match = + (regex: NixValue) => + (str: NixValue): NixValue => { + const regexStr = forceString(regex); + const inputStr = forceString(str); + + try { + const re = posixToJsRegex(regexStr, true); + const result = inputStr.match(re); + + if (!result) { + return null; + } + + const groups: NixValue[] = []; + for (let i = 1; i < result.length; i++) { + groups.push(result[i] !== undefined ? result[i] : null); + } + + return groups; + } catch (e) { + throw new Error(`Invalid regular expression '${regexStr}': ${e}`); + } + }; + +export const split = + (regex: NixValue) => + (str: NixValue): NixValue => { + const regexStr = forceString(regex); + const inputStr = forceString(str); + + try { + const re = posixToJsRegex(regexStr); + const reGlobal = new RegExp(re.source, re.flags + "g"); + + const result: NixValue[] = []; + let lastIndex = 0; + let match: RegExpExecArray | null; + + while ((match = reGlobal.exec(inputStr)) !== null) { + result.push(inputStr.substring(lastIndex, match.index)); + + const groups: NixValue[] = []; + for (let i = 1; i < match.length; i++) { + groups.push(match[i] !== undefined ? match[i] : null); + } + result.push(groups); + + lastIndex = match.index + match[0].length; + + if (match[0].length === 0) { + reGlobal.lastIndex++; + } + } + + if (lastIndex === 0) { + return [inputStr]; + } + + result.push(inputStr.substring(lastIndex)); + return result; + } catch (e) { + throw new Error(`Invalid regular expression '${regexStr}': ${e}`); + } + }; diff --git a/nix-js/src/codegen.rs b/nix-js/src/codegen.rs index 27b1a9b..320ca75 100644 --- a/nix-js/src/codegen.rs +++ b/nix-js/src/codegen.rs @@ -78,7 +78,7 @@ impl Compile for Ir { &Ir::Assert(Assert { assertion, expr }) => { let assertion = ctx.get_ir(assertion).compile(ctx); let expr = ctx.get_ir(expr).compile(ctx); - format!("({assertion})?({expr}):(()=>{{throw \"assertion failed\"}})()") + format!("({assertion})?({expr}):(()=>{{throw new Error(\"assertion failed\")}})()") } } } diff --git a/nix-js/tests/regex.rs b/nix-js/tests/regex.rs new file mode 100644 index 0000000..1756114 --- /dev/null +++ b/nix-js/tests/regex.rs @@ -0,0 +1,316 @@ +mod utils; + +use nix_js::value::{List, Value}; +use utils::eval; + +#[test] +fn test_match_exact_full_string() { + assert_eq!( + eval(r#"builtins.match "foobar" "foobar""#), + Value::List(List::new(vec![])) + ); +} + +#[test] +fn test_match_partial_returns_null() { + assert_eq!(eval(r#"builtins.match "foo" "foobar""#), Value::Null); +} + +#[test] +fn test_match_with_capture_groups() { + assert_eq!( + eval(r#"builtins.match "(.*)\\.nix" "foobar.nix""#), + Value::List(List::new(vec![Value::String("foobar".into())])) + ); +} + +#[test] +fn test_match_multiple_capture_groups() { + assert_eq!( + eval(r#"builtins.match "((.*)/)?([^/]*)\\.nix" "foobar.nix""#), + Value::List(List::new(vec![ + Value::Null, + Value::Null, + Value::String("foobar".into()) + ])) + ); +} + +#[test] +fn test_match_with_path() { + assert_eq!( + eval(r#"builtins.match "((.*)/)?([^/]*)\\.nix" "/path/to/foobar.nix""#), + Value::List(List::new(vec![ + Value::String("/path/to/".into()), + Value::String("/path/to".into()), + Value::String("foobar".into()) + ])) + ); +} + +#[test] +fn test_match_posix_space_class() { + assert_eq!( + eval(r#"builtins.match "[[:space:]]+([^[:space:]]+)[[:space:]]+" " foo ""#), + Value::List(List::new(vec![Value::String("foo".into())])) + ); +} + +#[test] +fn test_match_posix_upper_class() { + assert_eq!(eval(r#"builtins.match "[[:space:]]+([[:upper:]]+)[[:space:]]+" " foo ""#), Value::Null); + + assert_eq!( + eval(r#"builtins.match "[[:space:]]+([[:upper:]]+)[[:space:]]+" " FOO ""#), + Value::List(List::new(vec![Value::String("FOO".into())])) + ); +} + +#[test] +fn test_match_quantifiers() { + assert_eq!( + eval(r#"builtins.match "fo*" "f""#), + Value::List(List::new(vec![])) + ); + + assert_eq!(eval(r#"builtins.match "fo+" "f""#), Value::Null); + + assert_eq!( + eval(r#"builtins.match "fo{1,2}" "foo""#), + Value::List(List::new(vec![])) + ); + + assert_eq!(eval(r#"builtins.match "fo{1,2}" "fooo""#), Value::Null); +} + +#[test] +fn test_split_non_capturing() { + assert_eq!( + eval(r#"builtins.split "foobar" "foobar""#), + Value::List(List::new(vec![ + Value::String("".into()), + Value::List(List::new(vec![])), + Value::String("".into()) + ])) + ); +} + +#[test] +fn test_split_no_match() { + assert_eq!( + eval(r#"builtins.split "fo+" "f""#), + Value::List(List::new(vec![Value::String("f".into())])) + ); +} + +#[test] +fn test_split_with_capture_group() { + assert_eq!( + eval(r#"builtins.split "(fo*)" "foobar""#), + Value::List(List::new(vec![ + Value::String("".into()), + Value::List(List::new(vec![Value::String("foo".into())])), + Value::String("bar".into()) + ])) + ); +} + +#[test] +fn test_split_multiple_matches() { + assert_eq!( + eval(r#"builtins.split "(b)" "foobarbaz""#), + Value::List(List::new(vec![ + Value::String("foo".into()), + Value::List(List::new(vec![Value::String("b".into())])), + Value::String("ar".into()), + Value::List(List::new(vec![Value::String("b".into())])), + Value::String("az".into()) + ])) + ); +} + +#[test] +fn test_split_with_multiple_groups() { + assert_eq!( + eval(r#"builtins.split "(f)(o*)" "foo""#), + Value::List(List::new(vec![ + Value::String("".into()), + Value::List(List::new(vec![ + Value::String("f".into()), + Value::String("oo".into()) + ])), + Value::String("".into()) + ])) + ); +} + +#[test] +fn test_split_with_optional_groups() { + assert_eq!( + eval(r#"builtins.split "(a)|(c)" "abc""#), + Value::List(List::new(vec![ + Value::String("".into()), + Value::List(List::new(vec![Value::String("a".into()), Value::Null])), + Value::String("b".into()), + Value::List(List::new(vec![Value::Null, Value::String("c".into())])), + Value::String("".into()) + ])) + ); +} + +#[test] +fn test_split_greedy_matching() { + assert_eq!( + eval(r#"builtins.split "(o+)" "oooofoooo""#), + Value::List(List::new(vec![ + Value::String("".into()), + Value::List(List::new(vec![Value::String("oooo".into())])), + Value::String("f".into()), + Value::List(List::new(vec![Value::String("oooo".into())])), + Value::String("".into()) + ])) + ); +} + +#[test] +fn test_split_posix_classes() { + assert_eq!( + eval(r#"builtins.split "([[:upper:]]+)" " FOO ""#), + Value::List(List::new(vec![ + Value::String(" ".into()), + Value::List(List::new(vec![Value::String("FOO".into())])), + Value::String(" ".into()) + ])) + ); +} + +#[test] +fn test_replace_basic() { + assert_eq!( + eval(r#"builtins.replaceStrings ["o"] ["a"] "foobar""#), + Value::String("faabar".into()) + ); +} + +#[test] +fn test_replace_with_empty() { + assert_eq!( + eval(r#"builtins.replaceStrings ["o"] [""] "foobar""#), + Value::String("fbar".into()) + ); +} + +#[test] +fn test_replace_multiple_patterns() { + assert_eq!( + eval(r#"builtins.replaceStrings ["oo" "a"] ["a" "oo"] "foobar""#), + Value::String("faboor".into()) + ); +} + +#[test] +fn test_replace_first_match_wins() { + assert_eq!( + eval(r#"builtins.replaceStrings ["oo" "oo"] ["u" "i"] "foobar""#), + Value::String("fubar".into()) + ); +} + +#[test] +fn test_replace_empty_pattern() { + assert_eq!( + eval(r#"builtins.replaceStrings [""] ["X"] "abc""#), + Value::String("XaXbXcX".into()) + ); +} + +#[test] +fn test_replace_empty_pattern_empty_string() { + assert_eq!( + eval(r#"builtins.replaceStrings [""] ["X"] """#), + Value::String("X".into()) + ); +} + +#[test] +fn test_replace_simple_char() { + assert_eq!( + eval(r#"builtins.replaceStrings ["-"] ["_"] "a-b""#), + Value::String("a_b".into()) + ); +} + +#[test] +fn test_replace_longer_pattern() { + assert_eq!( + eval(r#"builtins.replaceStrings ["oo"] ["u"] "foobar""#), + Value::String("fubar".into()) + ); +} + +#[test] +fn test_replace_different_lengths() { + let result = std::panic::catch_unwind(|| { + eval(r#"builtins.replaceStrings ["a" "b"] ["x"] "test""#) + }); + assert!(result.is_err()); +} + +#[test] +fn test_split_version_simple() { + assert_eq!( + eval(r#"builtins.splitVersion "1.2.3""#), + Value::List(List::new(vec![ + Value::String("1".into()), + Value::String("2".into()), + Value::String("3".into()) + ])) + ); +} + +#[test] +fn test_split_version_with_pre() { + assert_eq!( + eval(r#"builtins.splitVersion "2.3.0pre1234""#), + Value::List(List::new(vec![ + Value::String("2".into()), + Value::String("3".into()), + Value::String("0".into()), + Value::String("pre".into()), + Value::String("1234".into()) + ])) + ); +} + +#[test] +fn test_split_version_with_letters() { + assert_eq!( + eval(r#"builtins.splitVersion "2.3a""#), + Value::List(List::new(vec![ + Value::String("2".into()), + Value::String("3".into()), + Value::String("a".into()) + ])) + ); +} + +#[test] +fn test_split_version_with_dashes() { + assert_eq!( + eval(r#"builtins.splitVersion "2.3-beta1""#), + Value::List(List::new(vec![ + Value::String("2".into()), + Value::String("3".into()), + Value::String("beta".into()), + Value::String("1".into()) + ])) + ); +} + +#[test] +fn test_split_version_empty() { + assert_eq!( + eval(r#"builtins.splitVersion """#), + Value::List(List::new(vec![])) + ); +}