feat: regex related builtins
This commit is contained in:
@@ -175,6 +175,8 @@ export const builtins: any = {
|
||||
substring: mkPrimop(string.substring, "substring", 3),
|
||||
concatStringsSep: mkPrimop(string.concatStringsSep, "concatStringsSep", 2),
|
||||
baseNameOf: mkPrimop(string.baseNameOf, "baseNameOf", 1),
|
||||
match: mkPrimop(string.match, "match", 2),
|
||||
split: mkPrimop(string.split, "split", 2),
|
||||
|
||||
seq: mkPrimop(functional.seq, "seq", 2),
|
||||
deepSeq: mkPrimop(functional.deepSeq, "deepSeq", 2),
|
||||
@@ -234,14 +236,12 @@ export const builtins: any = {
|
||||
functionArgs: mkPrimop(misc.functionArgs, "functionArgs", 1),
|
||||
genericClosure: mkPrimop(misc.genericClosure, "genericClosure", 1),
|
||||
getFlake: mkPrimop(misc.getFlake, "getFlake", 1),
|
||||
match: mkPrimop(misc.match, "match", 2),
|
||||
outputOf: mkPrimop(misc.outputOf, "outputOf", 2),
|
||||
parseDrvName: mkPrimop(misc.parseDrvName, "parseDrvName", 1),
|
||||
parseFlakeName: mkPrimop(misc.parseFlakeName, "parseFlakeName", 1),
|
||||
parseFlakeRef: mkPrimop(misc.parseFlakeRef, "parseFlakeRef", 1),
|
||||
placeholder: mkPrimop(misc.placeholder, "placeholder", 1),
|
||||
replaceStrings: mkPrimop(misc.replaceStrings, "replaceStrings", 3),
|
||||
split: mkPrimop(misc.split, "split", 2),
|
||||
splitVersion: mkPrimop(misc.splitVersion, "splitVersion", 1),
|
||||
traceVerbose: mkPrimop(misc.traceVerbose, "traceVerbose", 2),
|
||||
tryEval: mkPrimop(misc.tryEval, "tryEval", 1),
|
||||
@@ -259,6 +259,6 @@ export const builtins: any = {
|
||||
|
||||
langVersion: 6,
|
||||
nixPath: [],
|
||||
nixVersion: "NIX_JS_VERSION",
|
||||
nixVersion: "2.31.2",
|
||||
storeDir: "/nix/store",
|
||||
};
|
||||
|
||||
@@ -160,12 +160,6 @@ export const getFlake = (attrs: NixValue): never => {
|
||||
throw new Error("Not implemented: getFlake");
|
||||
};
|
||||
|
||||
export const match =
|
||||
(regex: NixValue) =>
|
||||
(str: NixValue): never => {
|
||||
throw new Error("Not implemented: match");
|
||||
};
|
||||
|
||||
export const outputOf =
|
||||
(drv: NixValue) =>
|
||||
(out: NixValue): never => {
|
||||
@@ -191,16 +185,77 @@ export const placeholder = (output: NixValue): never => {
|
||||
export const replaceStrings =
|
||||
(from: NixValue) =>
|
||||
(to: NixValue) =>
|
||||
(s: NixValue): never => {
|
||||
throw new Error("Not implemented: replaceStrings");
|
||||
(s: NixValue): NixValue => {
|
||||
const fromList = forceList(from);
|
||||
const toList = forceList(to);
|
||||
const inputStr = forceString(s);
|
||||
|
||||
if (fromList.length !== toList.length) {
|
||||
throw new Error(
|
||||
"'from' and 'to' arguments passed to builtins.replaceStrings have different lengths"
|
||||
);
|
||||
}
|
||||
|
||||
const toCache = new Map<number, string>();
|
||||
|
||||
let result = "";
|
||||
let pos = 0;
|
||||
|
||||
while (pos <= inputStr.length) {
|
||||
let found = false;
|
||||
|
||||
for (let i = 0; i < fromList.length; i++) {
|
||||
const pattern = forceString(fromList[i]);
|
||||
|
||||
if (inputStr.substring(pos).startsWith(pattern)) {
|
||||
found = true;
|
||||
|
||||
if (!toCache.has(i)) {
|
||||
toCache.set(i, forceString(toList[i]));
|
||||
}
|
||||
const replacement = toCache.get(i)!;
|
||||
|
||||
result += replacement;
|
||||
|
||||
if (pattern.length === 0) {
|
||||
if (pos < inputStr.length) {
|
||||
result += inputStr[pos];
|
||||
}
|
||||
pos++;
|
||||
} else {
|
||||
pos += pattern.length;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
if (pos < inputStr.length) {
|
||||
result += inputStr[pos];
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
export const split = (regex: NixValue, str: NixValue): never => {
|
||||
throw new Error("Not implemented: split");
|
||||
};
|
||||
|
||||
export const splitVersion = (s: NixValue): never => {
|
||||
throw new Error("Not implemented: splitVersion");
|
||||
export const splitVersion = (s: NixValue): NixValue => {
|
||||
const version = forceString(s);
|
||||
const components: string[] = [];
|
||||
let idx = 0;
|
||||
|
||||
while (idx < version.length) {
|
||||
const result = nextComponent(version, idx);
|
||||
if (result.component === "") {
|
||||
break;
|
||||
}
|
||||
components.push(result.component);
|
||||
idx = result.nextIndex;
|
||||
}
|
||||
|
||||
return components;
|
||||
};
|
||||
|
||||
export const traceVerbose = (e1: NixValue, e2: NixValue): never => {
|
||||
|
||||
@@ -42,3 +42,122 @@ export const baseNameOf = (x: NixValue): string => {
|
||||
|
||||
return str.substring(pos, last + 1);
|
||||
};
|
||||
|
||||
const POSIX_CLASSES: Record<string, string> = {
|
||||
alnum: "a-zA-Z0-9",
|
||||
alpha: "a-zA-Z",
|
||||
blank: " \\t",
|
||||
digit: "0-9",
|
||||
lower: "a-z",
|
||||
upper: "A-Z",
|
||||
space: "\\s",
|
||||
xdigit: "0-9A-Fa-f",
|
||||
punct: "\\-!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~",
|
||||
};
|
||||
|
||||
function posixToJsRegex(pattern: string, fullMatch: boolean = false): RegExp {
|
||||
let jsPattern = pattern;
|
||||
|
||||
jsPattern = jsPattern.replace(/\[(\^?)(?:\[:(\w+):\])+\]/g, (match) => {
|
||||
const isNegated = match[1] === "^";
|
||||
const classNames = [...match.matchAll(/\[:(\w+):\]/g)].map((m) => m[1]);
|
||||
|
||||
const combined = classNames
|
||||
.map((className) => {
|
||||
const replacement = POSIX_CLASSES[className];
|
||||
if (!replacement) {
|
||||
throw new Error(`Unknown POSIX character class: ${className}`);
|
||||
}
|
||||
return replacement;
|
||||
})
|
||||
.join("");
|
||||
|
||||
return isNegated ? `[^${combined}]` : `[${combined}]`;
|
||||
});
|
||||
|
||||
jsPattern = jsPattern.replace(/\[:(\w+):\]/g, (_match, className) => {
|
||||
const replacement = POSIX_CLASSES[className];
|
||||
if (!replacement) {
|
||||
throw new Error(`Unknown POSIX character class: ${className}`);
|
||||
}
|
||||
return replacement;
|
||||
});
|
||||
|
||||
if (fullMatch) {
|
||||
if (!jsPattern.startsWith("^")) {
|
||||
jsPattern = "^" + jsPattern;
|
||||
}
|
||||
if (!jsPattern.endsWith("$")) {
|
||||
jsPattern = jsPattern + "$";
|
||||
}
|
||||
}
|
||||
|
||||
return new RegExp(jsPattern, "u");
|
||||
}
|
||||
|
||||
|
||||
export const match =
|
||||
(regex: NixValue) =>
|
||||
(str: NixValue): NixValue => {
|
||||
const regexStr = forceString(regex);
|
||||
const inputStr = forceString(str);
|
||||
|
||||
try {
|
||||
const re = posixToJsRegex(regexStr, true);
|
||||
const result = inputStr.match(re);
|
||||
|
||||
if (!result) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const groups: NixValue[] = [];
|
||||
for (let i = 1; i < result.length; i++) {
|
||||
groups.push(result[i] !== undefined ? result[i] : null);
|
||||
}
|
||||
|
||||
return groups;
|
||||
} catch (e) {
|
||||
throw new Error(`Invalid regular expression '${regexStr}': ${e}`);
|
||||
}
|
||||
};
|
||||
|
||||
export const split =
|
||||
(regex: NixValue) =>
|
||||
(str: NixValue): NixValue => {
|
||||
const regexStr = forceString(regex);
|
||||
const inputStr = forceString(str);
|
||||
|
||||
try {
|
||||
const re = posixToJsRegex(regexStr);
|
||||
const reGlobal = new RegExp(re.source, re.flags + "g");
|
||||
|
||||
const result: NixValue[] = [];
|
||||
let lastIndex = 0;
|
||||
let match: RegExpExecArray | null;
|
||||
|
||||
while ((match = reGlobal.exec(inputStr)) !== null) {
|
||||
result.push(inputStr.substring(lastIndex, match.index));
|
||||
|
||||
const groups: NixValue[] = [];
|
||||
for (let i = 1; i < match.length; i++) {
|
||||
groups.push(match[i] !== undefined ? match[i] : null);
|
||||
}
|
||||
result.push(groups);
|
||||
|
||||
lastIndex = match.index + match[0].length;
|
||||
|
||||
if (match[0].length === 0) {
|
||||
reGlobal.lastIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
if (lastIndex === 0) {
|
||||
return [inputStr];
|
||||
}
|
||||
|
||||
result.push(inputStr.substring(lastIndex));
|
||||
return result;
|
||||
} catch (e) {
|
||||
throw new Error(`Invalid regular expression '${regexStr}': ${e}`);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -78,7 +78,7 @@ impl<Ctx: CodegenContext> Compile<Ctx> for Ir {
|
||||
&Ir::Assert(Assert { assertion, expr }) => {
|
||||
let assertion = ctx.get_ir(assertion).compile(ctx);
|
||||
let expr = ctx.get_ir(expr).compile(ctx);
|
||||
format!("({assertion})?({expr}):(()=>{{throw \"assertion failed\"}})()")
|
||||
format!("({assertion})?({expr}):(()=>{{throw new Error(\"assertion failed\")}})()")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
316
nix-js/tests/regex.rs
Normal file
316
nix-js/tests/regex.rs
Normal file
@@ -0,0 +1,316 @@
|
||||
mod utils;
|
||||
|
||||
use nix_js::value::{List, Value};
|
||||
use utils::eval;
|
||||
|
||||
#[test]
|
||||
fn test_match_exact_full_string() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.match "foobar" "foobar""#),
|
||||
Value::List(List::new(vec![]))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_match_partial_returns_null() {
|
||||
assert_eq!(eval(r#"builtins.match "foo" "foobar""#), Value::Null);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_match_with_capture_groups() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.match "(.*)\\.nix" "foobar.nix""#),
|
||||
Value::List(List::new(vec![Value::String("foobar".into())]))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_match_multiple_capture_groups() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.match "((.*)/)?([^/]*)\\.nix" "foobar.nix""#),
|
||||
Value::List(List::new(vec![
|
||||
Value::Null,
|
||||
Value::Null,
|
||||
Value::String("foobar".into())
|
||||
]))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_match_with_path() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.match "((.*)/)?([^/]*)\\.nix" "/path/to/foobar.nix""#),
|
||||
Value::List(List::new(vec![
|
||||
Value::String("/path/to/".into()),
|
||||
Value::String("/path/to".into()),
|
||||
Value::String("foobar".into())
|
||||
]))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_match_posix_space_class() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.match "[[:space:]]+([^[:space:]]+)[[:space:]]+" " foo ""#),
|
||||
Value::List(List::new(vec![Value::String("foo".into())]))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_match_posix_upper_class() {
|
||||
assert_eq!(eval(r#"builtins.match "[[:space:]]+([[:upper:]]+)[[:space:]]+" " foo ""#), Value::Null);
|
||||
|
||||
assert_eq!(
|
||||
eval(r#"builtins.match "[[:space:]]+([[:upper:]]+)[[:space:]]+" " FOO ""#),
|
||||
Value::List(List::new(vec![Value::String("FOO".into())]))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_match_quantifiers() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.match "fo*" "f""#),
|
||||
Value::List(List::new(vec![]))
|
||||
);
|
||||
|
||||
assert_eq!(eval(r#"builtins.match "fo+" "f""#), Value::Null);
|
||||
|
||||
assert_eq!(
|
||||
eval(r#"builtins.match "fo{1,2}" "foo""#),
|
||||
Value::List(List::new(vec![]))
|
||||
);
|
||||
|
||||
assert_eq!(eval(r#"builtins.match "fo{1,2}" "fooo""#), Value::Null);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_non_capturing() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.split "foobar" "foobar""#),
|
||||
Value::List(List::new(vec![
|
||||
Value::String("".into()),
|
||||
Value::List(List::new(vec![])),
|
||||
Value::String("".into())
|
||||
]))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_no_match() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.split "fo+" "f""#),
|
||||
Value::List(List::new(vec![Value::String("f".into())]))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_with_capture_group() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.split "(fo*)" "foobar""#),
|
||||
Value::List(List::new(vec![
|
||||
Value::String("".into()),
|
||||
Value::List(List::new(vec![Value::String("foo".into())])),
|
||||
Value::String("bar".into())
|
||||
]))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_multiple_matches() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.split "(b)" "foobarbaz""#),
|
||||
Value::List(List::new(vec![
|
||||
Value::String("foo".into()),
|
||||
Value::List(List::new(vec![Value::String("b".into())])),
|
||||
Value::String("ar".into()),
|
||||
Value::List(List::new(vec![Value::String("b".into())])),
|
||||
Value::String("az".into())
|
||||
]))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_with_multiple_groups() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.split "(f)(o*)" "foo""#),
|
||||
Value::List(List::new(vec![
|
||||
Value::String("".into()),
|
||||
Value::List(List::new(vec![
|
||||
Value::String("f".into()),
|
||||
Value::String("oo".into())
|
||||
])),
|
||||
Value::String("".into())
|
||||
]))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_with_optional_groups() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.split "(a)|(c)" "abc""#),
|
||||
Value::List(List::new(vec![
|
||||
Value::String("".into()),
|
||||
Value::List(List::new(vec![Value::String("a".into()), Value::Null])),
|
||||
Value::String("b".into()),
|
||||
Value::List(List::new(vec![Value::Null, Value::String("c".into())])),
|
||||
Value::String("".into())
|
||||
]))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_greedy_matching() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.split "(o+)" "oooofoooo""#),
|
||||
Value::List(List::new(vec![
|
||||
Value::String("".into()),
|
||||
Value::List(List::new(vec![Value::String("oooo".into())])),
|
||||
Value::String("f".into()),
|
||||
Value::List(List::new(vec![Value::String("oooo".into())])),
|
||||
Value::String("".into())
|
||||
]))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_posix_classes() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.split "([[:upper:]]+)" " FOO ""#),
|
||||
Value::List(List::new(vec![
|
||||
Value::String(" ".into()),
|
||||
Value::List(List::new(vec![Value::String("FOO".into())])),
|
||||
Value::String(" ".into())
|
||||
]))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replace_basic() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.replaceStrings ["o"] ["a"] "foobar""#),
|
||||
Value::String("faabar".into())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replace_with_empty() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.replaceStrings ["o"] [""] "foobar""#),
|
||||
Value::String("fbar".into())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replace_multiple_patterns() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.replaceStrings ["oo" "a"] ["a" "oo"] "foobar""#),
|
||||
Value::String("faboor".into())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replace_first_match_wins() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.replaceStrings ["oo" "oo"] ["u" "i"] "foobar""#),
|
||||
Value::String("fubar".into())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replace_empty_pattern() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.replaceStrings [""] ["X"] "abc""#),
|
||||
Value::String("XaXbXcX".into())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replace_empty_pattern_empty_string() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.replaceStrings [""] ["X"] """#),
|
||||
Value::String("X".into())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replace_simple_char() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.replaceStrings ["-"] ["_"] "a-b""#),
|
||||
Value::String("a_b".into())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replace_longer_pattern() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.replaceStrings ["oo"] ["u"] "foobar""#),
|
||||
Value::String("fubar".into())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replace_different_lengths() {
|
||||
let result = std::panic::catch_unwind(|| {
|
||||
eval(r#"builtins.replaceStrings ["a" "b"] ["x"] "test""#)
|
||||
});
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_version_simple() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.splitVersion "1.2.3""#),
|
||||
Value::List(List::new(vec![
|
||||
Value::String("1".into()),
|
||||
Value::String("2".into()),
|
||||
Value::String("3".into())
|
||||
]))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_version_with_pre() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.splitVersion "2.3.0pre1234""#),
|
||||
Value::List(List::new(vec![
|
||||
Value::String("2".into()),
|
||||
Value::String("3".into()),
|
||||
Value::String("0".into()),
|
||||
Value::String("pre".into()),
|
||||
Value::String("1234".into())
|
||||
]))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_version_with_letters() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.splitVersion "2.3a""#),
|
||||
Value::List(List::new(vec![
|
||||
Value::String("2".into()),
|
||||
Value::String("3".into()),
|
||||
Value::String("a".into())
|
||||
]))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_version_with_dashes() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.splitVersion "2.3-beta1""#),
|
||||
Value::List(List::new(vec![
|
||||
Value::String("2".into()),
|
||||
Value::String("3".into()),
|
||||
Value::String("beta".into()),
|
||||
Value::String("1".into())
|
||||
]))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_version_empty() {
|
||||
assert_eq!(
|
||||
eval(r#"builtins.splitVersion """#),
|
||||
Value::List(List::new(vec![]))
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user