Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
449 changes: 448 additions & 1 deletion Cargo.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ ninja_env = { path = "ninja_env" }
shell-quote = { version = "0.7.2", default-features = false, features = ["sh"] }
shlex = "1.3.0"
time = { version = "0.3.44", features = ["formatting", "macros", "parsing", "serde"] }
ureq = { version = "2.10.5" }

[build-dependencies]
clap = { version = "4.5.0", features = ["derive"] }
Expand Down
12 changes: 12 additions & 0 deletions docs/netsuke-design.md
Original file line number Diff line number Diff line change
Expand Up @@ -922,6 +922,18 @@ Using `shell()` marks the template as *impure* and disables caching of the
rendered YAML between Stage 2 and Stage 3. This avoids accidental reuse of
results that depend on external commands.

Implementation details:

- `fetch` issues HTTP requests through the `ureq` client. When caching is
enabled a SHA-256 digest of the URL becomes the cache key and responses are
written beneath `.netsuke/fetch` (or a user-provided directory) using
capability-restricted file handles.
- `shell` and `grep` spawn the platform shell (`sh` or `cmd.exe`) with POSIX
single-quoted arguments emitted via `shell-quote`. The stdlib registers a
shared `StdlibState` that flips an `impure` flag whenever these helpers
execute so callers can detect templates that interacted with the outside
world.

Custom external commands can be registered as additional filters. Those should
be marked `pure` if safe for caching or `impure` otherwise.

Expand Down
4 changes: 2 additions & 2 deletions docs/roadmap.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ library, and CLI ergonomics.
- [x] Refactor all error-producing code to provide the clear, contextual, and
actionable error messages specified in the design document.

- [ ] **Template Standard Library:**
- [x] **Template Standard Library:**

- [x] Implement the basic file-system tests (`dir`, `file`, `symlink`,
`pipe`, `block_device`, `char_device`, legacy `device`). *(done)*
Expand All @@ -165,7 +165,7 @@ library, and CLI ergonomics.
- [x] Implement the generic collection filters (`uniq`, `flatten`,
`group_by`). *(done)*

- [ ] Implement the network and command functions/filters (fetch, shell,
- [x] Implement the network and command functions/filters (fetch, shell,
grep), ensuring shell marks templates as impure to disable caching.

- [x] Implement the time helpers (`now`, `timedelta`).
Expand Down
2 changes: 1 addition & 1 deletion src/manifest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ fn from_str_named(yaml: &str, name: &str) -> Result<NetsukeManifest> {
// Expose custom helpers to templates.
jinja.add_function("env", |name: String| env_var(&name));
jinja.add_function("glob", |pattern: String| glob_paths(&pattern));
crate::stdlib::register(&mut jinja);
let _stdlib_state = crate::stdlib::register(&mut jinja);

if let Some(vars) = doc.get("vars").and_then(|v| v.as_mapping()).cloned() {
for (k, v) in vars {
Expand Down
219 changes: 219 additions & 0 deletions src/stdlib/command.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
//! Shell-oriented helpers for the `MiniJinja` standard library.
//!
//! The helpers bridge template values into the local shell while keeping
//! behaviour predictable across platforms. All helpers mark the stdlib state as
//! impure so the caller can invalidate any caching layer that depends on pure
//! template evaluation.

use std::{
io::{self, Write},
process::{Command, Stdio},
sync::{
Arc,
atomic::{AtomicBool, Ordering},
},
};

use minijinja::{
Error, ErrorKind, State,
value::{Value, ValueKind},
};
use shell_quote::{QuoteRefExt, Sh};

#[cfg(windows)]
const SHELL: &str = "cmd";
#[cfg(windows)]
const SHELL_ARGS: &[&str] = &["/C"];

#[cfg(not(windows))]
const SHELL: &str = "sh";
#[cfg(not(windows))]
const SHELL_ARGS: &[&str] = &["-c"];

pub(crate) fn register(env: &mut minijinja::Environment<'_>, impure: Arc<AtomicBool>) {
let shell_flag = Arc::clone(&impure);
env.add_filter(
"shell",
move |state: &State, value: Value, command: String| {
shell_flag.store(true, Ordering::Relaxed);
execute_shell(state, &value, &command)
},
);

let grep_flag = impure;
env.add_filter(
"grep",
move |state: &State, value: Value, pattern: String, flags: Option<Value>| {
grep_flag.store(true, Ordering::Relaxed);
execute_grep(state, &value, &pattern, flags)
},
);
}
Comment on lines +56 to +74
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

Add doc comment for register function.

The coding guidelines require that public APIs (including pub(crate) functions) have doc comments so cargo doc can generate documentation. Document the purpose of this function, the impure flag's role in marking templates impure, and the filters it registers.

Add this doc comment:

+/// Registers shell-oriented filters in the MiniJinja environment.
+///
+/// The `shell` filter executes arbitrary shell commands and returns their
+/// stdout. The `grep` filter searches input text using the `grep` utility.
+/// Both filters mark the provided `impure` flag to signal that template
+/// evaluation has side effects and should not be cached.
+///
+/// # Security
+///
+/// Only use these filters with trusted templates. See module-level docs.
 pub(crate) fn register(env: &mut minijinja::Environment<'_>, impure: Arc<AtomicBool>) {
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
pub(crate) fn register(env: &mut minijinja::Environment<'_>, impure: Arc<AtomicBool>) {
let shell_flag = Arc::clone(&impure);
env.add_filter(
"shell",
move |state: &State, value: Value, command: String| {
shell_flag.store(true, Ordering::Relaxed);
execute_shell(state, &value, &command)
},
);
let grep_flag = impure;
env.add_filter(
"grep",
move |state: &State, value: Value, pattern: String, flags: Option<Value>| {
grep_flag.store(true, Ordering::Relaxed);
execute_grep(state, &value, &pattern, flags)
},
);
}
/// Registers shell-oriented filters in the MiniJinja environment.
///
/// The `shell` filter executes arbitrary shell commands and returns their
/// stdout. The `grep` filter searches input text using the `grep` utility.
/// Both filters mark the provided `impure` flag to signal that template
/// evaluation has side effects and should not be cached.
///
/// # Security
///
/// Only use these filters with trusted templates. See module-level docs.
pub(crate) fn register(env: &mut minijinja::Environment<'_>, impure: Arc<AtomicBool>) {
let shell_flag = Arc::clone(&impure);
env.add_filter(
"shell",
move |state: &State, value: Value, command: String| {
shell_flag.store(true, Ordering::Relaxed);
execute_shell(state, &value, &command)
},
);
let grep_flag = impure;
env.add_filter(
"grep",
move |state: &State, value: Value, pattern: String, flags: Option<Value>| {
grep_flag.store(true, Ordering::Relaxed);
execute_grep(state, &value, &pattern, flags)
},
);
}
🤖 Prompt for AI Agents
In src/stdlib/command.rs around lines 56 to 74, the register function lacks a
doc comment; add a triple-slash (///) Rust doc comment immediately above
pub(crate) fn register explaining that this function registers the "shell" and
"grep" Minijinja filters on the provided Environment, describing the role of the
impure Arc<AtomicBool> parameter (it is set true when either filter is used to
mark templates as impure), and briefly summarizing behavior of each registered
filter (shell executes shell commands, grep filters values by pattern and
optional flags); keep the comment concise and idiomatic for cargo doc
generation.


fn execute_shell(state: &State, value: &Value, command: &str) -> Result<Value, Error> {
let cmd = command.trim();
if cmd.is_empty() {
return Err(Error::new(
ErrorKind::InvalidOperation,
"shell filter requires a non-empty command",
));
}

let input = to_bytes(value)?;
let output = run_command(cmd, &input).map_err(|err| command_error(err, state.name(), cmd))?;
Ok(value_from_bytes(output))
}

fn execute_grep(
state: &State,
value: &Value,
pattern: &str,
flags: Option<Value>,
) -> Result<Value, Error> {
if pattern.is_empty() {
return Err(Error::new(
ErrorKind::InvalidOperation,
"grep filter requires a search pattern",
));
}

let mut args = collect_flag_args(flags)?;
args.push(pattern.to_owned());
let command = format_command("grep", &args);
let input = to_bytes(value)?;
let output =
run_command(&command, &input).map_err(|err| command_error(err, state.name(), &command))?;
Ok(value_from_bytes(output))
}

fn collect_flag_args(flags: Option<Value>) -> Result<Vec<String>, Error> {
let Some(value) = flags else {
return Ok(Vec::new());
};
match value.kind() {
ValueKind::Undefined => Ok(Vec::new()),
ValueKind::Seq | ValueKind::Iterable => value
.try_iter()?
.map(|item| {
item.as_str().map_or_else(
|| {
Err(Error::new(
ErrorKind::InvalidOperation,
"grep flags must be strings",
))
},
|s| Ok(s.to_owned()),
)
})
.collect(),
_ => value
.as_str()
.map(|s| vec![s.to_owned()])
.ok_or_else(|| Error::new(ErrorKind::InvalidOperation, "grep flags must be strings")),
}
}

fn format_command(base: &str, args: &[String]) -> String {
let mut command = String::from(base);
for arg in args {
command.push(' ');
command.push_str(&quote(arg));
}
command
}

fn quote(arg: &str) -> String {
let bytes = arg.quoted(Sh);
String::from_utf8(bytes).expect("quoted args are valid UTF-8")
}

fn to_bytes(value: &Value) -> Result<Vec<u8>, Error> {
if value.is_undefined() {
return Err(Error::new(
ErrorKind::InvalidOperation,
"shell filter cannot act on undefined values",
));
}

if let Some(bytes) = value.as_bytes() {
return Ok(bytes.to_vec());
}

Ok(value.to_string().into_bytes())
}

fn run_command(command: &str, input: &[u8]) -> Result<Vec<u8>, CommandFailure> {
let mut cmd = Command::new(SHELL);
cmd.args(SHELL_ARGS)
.arg(command)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped());

let mut child = cmd.spawn().map_err(CommandFailure::Spawn)?;
if let Some(mut stdin) = child.stdin.take() {
stdin.write_all(input).map_err(CommandFailure::Io)?;
}
let output = child.wait_with_output().map_err(CommandFailure::Io)?;
if output.status.success() {
Ok(output.stdout)
} else {
Err(CommandFailure::Exit {
status: output.status.code(),
stderr: output.stderr,
})
}
}
Comment on lines +203 to +250
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Eliminate deadlock when writing to child stdin

Do not block on stdin.write_all before starting readers and before applying the timeout. A child that doesn’t read stdin and writes enough to stdout/stderr can deadlock this thread forever (timeout never triggers). Spawn stdout/stderr readers first and write on a dedicated thread; always join the writer and readers on exit/timeout.

Apply this diff:

@@
-fn run_child(mut command: Command, input: &[u8]) -> Result<Vec<u8>, CommandFailure> {
-    let mut child = command.spawn().map_err(CommandFailure::Spawn)?;
-    let mut broken_pipe = None;
-    if let Some(mut stdin) = child.stdin.take() {
-        match stdin.write_all(input) {
-            Ok(()) => {}
-            Err(err) => {
-                if err.kind() == io::ErrorKind::BrokenPipe {
-                    broken_pipe = Some(err);
-                } else {
-                    return Err(CommandFailure::Io(err));
-                }
-            }
-        }
-    }
-
-    let mut stdout_reader = spawn_pipe_reader(child.stdout.take());
-    let mut stderr_reader = spawn_pipe_reader(child.stderr.take());
+fn run_child(mut command: Command, input: &[u8]) -> Result<Vec<u8>, CommandFailure> {
+    let mut child = command.spawn().map_err(CommandFailure::Spawn)?;
+    // Start readers first to avoid pipe back‑pressure deadlocks.
+    let mut stdout_reader = spawn_pipe_reader(child.stdout.take());
+    let mut stderr_reader = spawn_pipe_reader(child.stderr.take());
+    // Write stdin on a separate thread so a non‑reading child cannot block us.
+    let stdin_handle = child.stdin.take().map(|mut stdin| {
+        let buf = input.to_vec();
+        thread::spawn(move || {
+            let res = stdin.write_all(&buf);
+            drop(stdin); // close to signal EOF
+            res
+        })
+    });
@@
-    let status = match wait_for_exit(&mut child, COMMAND_TIMEOUT) {
+    let status = match wait_for_exit(&mut child, COMMAND_TIMEOUT) {
         Ok(status) => status,
         Err(err) => {
-            let _ = join_reader(stdout_reader.take());
-            let _ = join_reader(stderr_reader.take());
+            let _ = join_reader(stdout_reader.take());
+            let _ = join_reader(stderr_reader.take());
+            if let Some(handle) = stdin_handle {
+                let _ = handle.join();
+            }
             return Err(err);
         }
     };
 
-    let stdout = join_reader(stdout_reader.take()).map_err(CommandFailure::Io)?;
-    let stderr = join_reader(stderr_reader.take()).map_err(CommandFailure::Io)?;
-
-    if let Some(err) = broken_pipe {
-        return Err(CommandFailure::BrokenPipe {
-            source: err,
-            status: status.code(),
-            stderr,
-        });
-    }
+    let stdout = join_reader(stdout_reader.take()).map_err(CommandFailure::Io)?;
+    let stderr = join_reader(stderr_reader.take()).map_err(CommandFailure::Io)?;
+    if let Some(handle) = stdin_handle {
+        match handle.join() {
+            Ok(Ok(())) => {}
+            Ok(Err(err)) => {
+                if err.kind() == io::ErrorKind::BrokenPipe {
+                    return Err(CommandFailure::BrokenPipe {
+                        source: err,
+                        status: status.code(),
+                        stderr,
+                    });
+                }
+                return Err(CommandFailure::Io(err));
+            }
+            Err(_) => return Err(CommandFailure::Io(io::Error::other("stdin writer panicked"))),
+        }
+    }
@@
-        Err(CommandFailure::Exit {
-            status: status.code(),
-            stderr,
-        })
+        Err(CommandFailure::Exit { status: status.code(), stderr })
     }
 }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
fn run_child(mut command: Command, input: &[u8]) -> Result<Vec<u8>, CommandFailure> {
let mut child = command.spawn().map_err(CommandFailure::Spawn)?;
let mut broken_pipe = None;
if let Some(mut stdin) = child.stdin.take() {
match stdin.write_all(input) {
Ok(()) => {}
Err(err) => {
if err.kind() == io::ErrorKind::BrokenPipe {
broken_pipe = Some(err);
} else {
return Err(CommandFailure::Io(err));
}
}
}
}
let mut stdout_reader = spawn_pipe_reader(child.stdout.take());
let mut stderr_reader = spawn_pipe_reader(child.stderr.take());
let status = match wait_for_exit(&mut child, COMMAND_TIMEOUT) {
Ok(status) => status,
Err(err) => {
let _ = join_reader(stdout_reader.take());
let _ = join_reader(stderr_reader.take());
return Err(err);
}
};
let stdout = join_reader(stdout_reader.take()).map_err(CommandFailure::Io)?;
let stderr = join_reader(stderr_reader.take()).map_err(CommandFailure::Io)?;
if let Some(err) = broken_pipe {
return Err(CommandFailure::BrokenPipe {
source: err,
status: status.code(),
stderr,
});
}
if status.success() {
Ok(stdout)
} else {
Err(CommandFailure::Exit {
status: status.code(),
stderr,
})
}
}
fn run_child(mut command: Command, input: &[u8]) -> Result<Vec<u8>, CommandFailure> {
let mut child = command.spawn().map_err(CommandFailure::Spawn)?;
- let mut broken_pipe = None;
- if let Some(mut stdin) = child.stdin.take() {
- match stdin.write_all(input) {
- Ok(()) => {}
- Err(err) => {
- if err.kind() == io::ErrorKind::BrokenPipe {
- broken_pipe = Some(err);
- } else {
- return Err(CommandFailure::Io(err));
- }
- }
- }
- }
-
- let mut stdout_reader = spawn_pipe_reader(child.stdout.take());
// Start readers first to avoid pipe back-pressure deadlocks.
let mut stdout_reader = spawn_pipe_reader(child.stdout.take());
let mut stderr_reader = spawn_pipe_reader(child.stderr.take());
// Write stdin on a separate thread so a non-reading child cannot block us.
let stdin_handle = child.stdin.take().map(|mut stdin| {
let buf = input.to_vec();
thread::spawn(move || {
let res = stdin.write_all(&buf);
drop(stdin); // close to signal EOF
res
})
});
let status = match wait_for_exit(&mut child, COMMAND_TIMEOUT) {
Ok(status) => status,
Err(err) => {
- let _ = join_reader(stdout_reader.take());
let _ = join_reader(stdout_reader.take());
let _ = join_reader(stderr_reader.take());
if let Some(handle) = stdin_handle {
let _ = handle.join();
}
return Err(err);
}
};
- let stdout = join_reader(stdout_reader.take()).map_err(CommandFailure::Io)?;
- let stderr = join_reader(stderr_reader.take()).map_err(CommandFailure::Io)?;
-
- if let Some(err) = broken_pipe {
- return Err(CommandFailure::BrokenPipe {
- source: err,
- status: status.code(),
- stderr,
- });
let stdout = join_reader(stdout_reader.take()).map_err(CommandFailure::Io)?;
let stderr = join_reader(stderr_reader.take()).map_err(CommandFailure::Io)?;
if let Some(handle) = stdin_handle {
match handle.join() {
Ok(Ok(())) => {}
Ok(Err(err)) => {
if err.kind() == io::ErrorKind::BrokenPipe {
return Err(CommandFailure::BrokenPipe {
source: err,
status: status.code(),
stderr,
});
}
return Err(CommandFailure::Io(err));
}
Err(_) => return Err(CommandFailure::Io(io::Error::other("stdin writer panicked"))),
}
}
if status.success() {
Ok(stdout)
} else {
- Err(CommandFailure::Exit {
- status: status.code(),
- stderr,
Err(CommandFailure::Exit { status: status.code(), stderr })
}
}
🤖 Prompt for AI Agents
In src/stdlib/command.rs around lines 203 to 250, the current run_child
implementation writes to the child's stdin synchronously before spawning
stdout/stderr readers and before applying the timeout, which can deadlock if the
child fills its stdout/stderr; to fix it, first spawn the stdout and stderr
reader threads (spawn_pipe_reader) and then perform the stdin.write_all on a
dedicated writer thread (or spawn_blocking) that captures any BrokenPipe or IO
error into a shared channel/Result; wait for child exit with wait_for_exit as
before, and on exit/timeout always join the writer thread and both reader
threads (join_reader) to ensure they terminate and return their output/errors;
propagate the writer error as BrokenPipe/Io as appropriate, and ensure reader
joins are mapped to CommandFailure::Io when collecting stdout/stderr.


fn value_from_bytes(bytes: Vec<u8>) -> Value {
String::from_utf8(bytes.clone()).map_or_else(|_| Value::from_bytes(bytes), Value::from)
}

fn command_error(err: CommandFailure, template: &str, command: &str) -> Error {
match err {
CommandFailure::Spawn(spawn) => Error::new(
ErrorKind::InvalidOperation,
format!("failed to spawn shell for '{command}' in template '{template}': {spawn}"),
),
CommandFailure::Io(io_err) => Error::new(
ErrorKind::InvalidOperation,
format!("shell command '{command}' in template '{template}' failed: {io_err}"),
),
CommandFailure::Exit { status, stderr } => {
let mut msg = status.map_or_else(
|| {
format!(
"shell command '{command}' in template '{template}' terminated by signal"
)
},
|code| {
format!(
"shell command '{command}' in template '{template}' exited with status {code}"
)
},
);
let stderr = String::from_utf8_lossy(&stderr);
let trimmed = stderr.trim();
if !trimmed.is_empty() {
msg.push_str(": ");
msg.push_str(trimmed);
}
Error::new(ErrorKind::InvalidOperation, msg)
}
}
}

enum CommandFailure {
Spawn(io::Error),
Io(io::Error),
Exit {
status: Option<i32>,
stderr: Vec<u8>,
},
}

impl From<io::Error> for CommandFailure {
fn from(err: io::Error) -> Self {
Self::Io(err)
}
}
50 changes: 42 additions & 8 deletions src/stdlib/mod.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
//! Standard library registration for `MiniJinja` templates.
//!
//! The module wires the platform-aware file tests, the path manipulation
//! filters, and the collection filters into a single entrypoint so template
//! authors can rely on consistent behaviour across projects. Tests such as
//! `dir`, `file`, and `symlink` inspect metadata without following symlinks,
//! while filters expose conveniences like `basename`, `with_suffix`,
//! `realpath`, content hashing, and collection utilities including
//! `flatten`, `group_by`, and `uniq`.
//! filters, the collection helpers, the network utilities, and the command
//! wrappers into a single entrypoint so template authors can rely on
//! consistent behaviour across projects. Tests such as `dir`, `file`, and
//! `symlink` inspect metadata without following symlinks, while filters
//! expose conveniences like `basename`, `with_suffix`, `realpath`, content
//! hashing, collection utilities including `flatten`, `group_by`, and `uniq`,
//! HTTP helpers like `fetch`, and shell bridges such as `shell` and `grep`.

mod collections;
mod command;
mod network;
mod path;
mod time;

Expand All @@ -17,9 +20,36 @@ use cap_std::fs;
#[cfg(unix)]
use cap_std::fs::FileTypeExt;
use minijinja::{Environment, Error, value::Value};
use std::{
sync::Arc,
sync::atomic::{AtomicBool, Ordering},
};

type FileTest = (&'static str, fn(fs::FileType) -> bool);

/// Captures mutable state shared between stdlib helpers.
#[derive(Clone, Default, Debug)]
pub struct StdlibState {
impure: Arc<AtomicBool>,
}

impl StdlibState {
/// Returns whether any impure helper executed during the last render.
#[must_use]
pub fn is_impure(&self) -> bool {
self.impure.load(Ordering::Relaxed)
}

/// Resets the impurity marker so callers can track helper usage per render.
pub fn reset_impure(&self) {
self.impure.store(false, Ordering::Relaxed);
}

pub(crate) fn impure_flag(&self) -> Arc<AtomicBool> {
Arc::clone(&self.impure)
}
}

/// Register standard library helpers with the `MiniJinja` environment.
///
/// # Examples
Expand All @@ -28,19 +58,23 @@ type FileTest = (&'static str, fn(fs::FileType) -> bool);
/// use netsuke::stdlib;
///
/// let mut env = Environment::new();
/// stdlib::register(&mut env);
/// let _state = stdlib::register(&mut env);
/// env.add_template("t", "{{ path | basename }}").expect("add template");
/// let tmpl = env.get_template("t").expect("get template");
/// let rendered = tmpl
/// .render(context!(path => "foo/bar.txt"))
/// .expect("render");
/// assert_eq!(rendered, "bar.txt");
/// ```
pub fn register(env: &mut Environment<'_>) {
pub fn register(env: &mut Environment<'_>) -> StdlibState {
let state = StdlibState::default();
register_file_tests(env);
path::register_filters(env);
collections::register_filters(env);
network::register_functions(env);
command::register(env, state.impure_flag());
time::register_functions(env);
state
}

fn register_file_tests(env: &mut Environment<'_>) {
Expand Down
Loading
Loading