From 84d76f35c4ed36bd88c436ec0cf6f61995d20e51 Mon Sep 17 00:00:00 2001 From: Burke Libbey Date: Fri, 5 Dec 2025 15:17:00 -0500 Subject: [PATCH 1/6] Add world builtins for native git repository access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce nine new builtins that provide native access to files from a git repository during Nix evaluation, replacing the need to shell out to git via derivations: - builtins.unsafeTectonixInternalManifest: Get zone path -> zone ID mapping - builtins.unsafeTectonixInternalManifestInverted: Get zone ID -> zone path mapping - builtins.unsafeTectonixInternalTreeSha: Get tree SHA for a world path - builtins.unsafeTectonixInternalTree: Fetch a tree by SHA as a store path - builtins.unsafeTectonixInternalFile: Read file contents from world repository - builtins.unsafeTectonixInternalZoneSrc: Get zone source as store path - builtins.unsafeTectonixInternalDir: List directory contents - builtins.unsafeTectonixInternalSparseCheckoutRoots: Get zone IDs in sparse checkout - builtins.unsafeTectonixInternalDirtyZones: Get zone path -> dirty status mapping New CLI flags: - --tectonix-git-dir: Path to git directory - --tectonix-sha: Git commit SHA to use - --tectonix-checkout-path: Optional checkout for source-available mode Features: - Lazy initialization of git resources - Tree SHA caching at each path level - Source-available mode for local development (prefers checkout files) - Zone dirty detection: computes dirty status for all sparse-checked-out zones by walking dirty files and matching against zone paths - ODB-only mode: bypasses full repository validation to support repos with unsupported extensions (e.g., reftables) - Sparse checkout awareness: reads .git/info/sparse-checkout-roots to determine which zones are checked out (supports gitdir worktrees) Also fixes GitSourceAccessor::readDirectory to return file types instead of always returning unknown. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/libexpr/eval.cc | 233 +++++++++ src/libexpr/include/nix/expr/eval-settings.hh | 35 ++ src/libexpr/include/nix/expr/eval.hh | 41 ++ src/libexpr/primops/meson.build | 1 + src/libexpr/primops/tectonix.cc | 493 ++++++++++++++++++ src/libfetchers/git-utils.cc | 97 +++- .../include/nix/fetchers/git-utils.hh | 17 + 7 files changed, 916 insertions(+), 1 deletion(-) create mode 100644 src/libexpr/primops/tectonix.cc diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 46393b79c5e..218ff0db549 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -24,6 +24,7 @@ #include "nix/fetchers/fetch-to-store.hh" #include "nix/fetchers/tarball.hh" #include "nix/fetchers/input-cache.hh" +#include "nix/fetchers/git-utils.hh" #include "nix/util/current-process.hh" #include "nix/store/async-path-writer.hh" #include "nix/expr/parallel-eval.hh" @@ -330,6 +331,7 @@ EvalState::EvalState( , importResolutionCache(make_ref()) , fileEvalCache(make_ref()) , regexCache(makeRegexCache()) + , worldTreeShaCache(make_ref()) #if NIX_USE_BOEHMGC , baseEnvP(std::allocate_shared(traceable_allocator(), &mem.allocEnv(BASE_ENV_SIZE))) , baseEnv(**baseEnvP) @@ -422,6 +424,237 @@ void EvalState::allowAndSetStorePathString(const StorePath & storePath, Value & mkStorePathString(storePath, v); } +ref EvalState::getWorldRepo() const +{ + if (!worldRepo) { + auto gitDir = settings.tectonixGitDir.get(); + if (gitDir.empty()) + throw Error("--tectonix-git-dir must be specified to use tectonix builtins"); + + // Expand ~ to home directory + if (hasPrefix(gitDir, "~/")) + gitDir = getHome() + gitDir.substr(1); + + worldRepo = GitRepo::openRepo(std::filesystem::path(gitDir), {.bare = true}); + } + return *worldRepo; +} + +ref EvalState::getWorldGitAccessor() const +{ + if (!worldGitAccessor) { + auto sha = settings.tectonixGitSha.get(); + if (sha.empty()) + throw Error("--tectonix-git-sha must be specified to use tectonix builtins"); + + auto repo = getWorldRepo(); + auto hash = Hash::parseNonSRIUnprefixed(sha, HashAlgorithm::SHA1); + + if (!repo->hasObject(hash)) + throw Error("tectonix-git-sha '%s' not found in repository", sha); + + GitAccessorOptions opts{.exportIgnore = false, .smudgeLfs = false}; + worldGitAccessor = repo->getAccessor(hash, opts, "world"); + } + return *worldGitAccessor; +} + +std::optional> EvalState::getWorldCheckoutAccessor() const +{ + if (!isTectonixSourceAvailable()) + return std::nullopt; + + if (!worldCheckoutAccessor) { + auto checkoutPath = settings.tectonixCheckoutPath.get(); + // Use the global filesystem accessor with the checkout path as root + worldCheckoutAccessor = getFSSourceAccessor(); + } + return *worldCheckoutAccessor; +} + +bool EvalState::isTectonixSourceAvailable() const +{ + return !settings.tectonixCheckoutPath.get().empty(); +} + +Hash EvalState::getWorldTreeSha(std::string_view worldPath) const +{ + // Normalize path (remove leading //) + std::string path(worldPath); + if (hasPrefix(path, "//")) + path = path.substr(2); + + // Check cache first + if (auto cached = getConcurrent(*worldTreeShaCache, path)) + return *cached; + + // Compute by walking from root + auto repo = getWorldRepo(); + auto sha = settings.tectonixGitSha.get(); + auto commitSha = Hash::parseNonSRIUnprefixed(sha, HashAlgorithm::SHA1); + + // Get the root tree SHA from the commit + auto rootTreeSha = repo->getCommitTree(commitSha); + + // Walk path components, caching intermediate results + Hash currentSha = rootTreeSha; + std::string currentPath; + + // Create an accessor for path validation + GitAccessorOptions opts{.exportIgnore = false, .smudgeLfs = false}; + auto accessor = repo->getAccessor(commitSha, opts, "world-tree"); + + for (auto & component : tokenizeString>(path, "/")) { + if (component.empty()) continue; + + std::string nextPath = currentPath.empty() ? component : currentPath + "/" + component; + + // Check if this level is cached + if (auto cached = getConcurrent(*worldTreeShaCache, nextPath)) { + currentSha = *cached; + currentPath = nextPath; + continue; + } + + // Need to compute: get tree entry for this component + auto fullPath = CanonPath("/" + nextPath); + auto stat = accessor->maybeLstat(fullPath); + + if (!stat || stat->type != SourceAccessor::Type::tDirectory) + throw Error("path '%s' does not exist or is not a directory in world", nextPath); + + // Get the tree SHA for this subtree + currentSha = repo->getSubtreeSha(currentSha, component); + + // Cache this level + worldTreeShaCache->try_emplace(nextPath, currentSha); + currentPath = nextPath; + } + + return currentSha; +} + +const std::set & EvalState::getTectonixSparseCheckoutRoots() const +{ + if (tectonixSparseCheckoutRoots) + return *tectonixSparseCheckoutRoots; + + std::set roots; + + if (isTectonixSourceAvailable()) { + auto checkoutPath = settings.tectonixCheckoutPath.get(); + + // Read .git to find the actual git directory + // It can be either a directory or a file containing "gitdir: " + auto dotGitPath = std::filesystem::path(checkoutPath) / ".git"; + std::filesystem::path gitDir; + + if (std::filesystem::is_directory(dotGitPath)) { + gitDir = dotGitPath; + } else if (std::filesystem::is_regular_file(dotGitPath)) { + auto gitdirContent = readFile(dotGitPath.string()); + // Parse "gitdir: \n" + if (hasPrefix(gitdirContent, "gitdir: ")) { + auto path = trim(gitdirContent.substr(8)); + gitDir = std::filesystem::path(path); + // Handle relative paths + if (gitDir.is_relative()) + gitDir = std::filesystem::path(checkoutPath) / gitDir; + } + } + + if (!gitDir.empty()) { + // Read sparse-checkout-roots + auto sparseRootsPath = gitDir / "info" / "sparse-checkout-roots"; + if (std::filesystem::exists(sparseRootsPath)) { + auto content = readFile(sparseRootsPath.string()); + for (auto & line : tokenizeString>(content, "\n")) { + auto trimmed = trim(line); + if (!trimmed.empty()) + roots.insert(std::string(trimmed)); + } + } + } + } + + tectonixSparseCheckoutRoots = std::move(roots); + return *tectonixSparseCheckoutRoots; +} + +const std::map & EvalState::getTectonixDirtyZones() const +{ + if (tectonixDirtyZones) + return *tectonixDirtyZones; + + std::map dirtyZones; + + if (isTectonixSourceAvailable()) { + // Get sparse checkout roots (zone IDs) + auto & sparseRoots = getTectonixSparseCheckoutRoots(); + + if (!sparseRoots.empty()) { + // Read manifest to get zone ID -> zone path mapping + auto gitDir = settings.tectonixGitDir.get(); + if (hasPrefix(gitDir, "~/")) + gitDir = getHome() + gitDir.substr(1); + + auto sha = settings.tectonixGitSha.get(); + if (!gitDir.empty() && !sha.empty()) { + auto repo = getWorldRepo(); + auto hash = Hash::parseNonSRIUnprefixed(sha, HashAlgorithm::SHA1); + + if (repo->hasObject(hash)) { + GitAccessorOptions opts{.exportIgnore = false, .smudgeLfs = false}; + auto accessor = repo->getAccessor(hash, opts, "world"); + + auto manifestPath = CanonPath("/.meta/manifest.json"); + if (accessor->pathExists(manifestPath)) { + auto manifestContent = accessor->readFile(manifestPath); + auto manifest = nlohmann::json::parse(manifestContent); + + // Build map of zone ID -> zone path for sparse roots only + std::map zoneIdToPath; + for (auto & [path, value] : manifest.items()) { + auto & id = value.at("id").get_ref(); + if (sparseRoots.count(id)) + zoneIdToPath[id] = path; + } + + // Initialize all sparse-checked-out zones as not dirty + for (auto & [zoneId, zonePath] : zoneIdToPath) { + dirtyZones[zonePath] = false; + } + + // Get dirty files by diffing tree against workdir (works with ODB-only repo) + auto checkoutPath = settings.tectonixCheckoutPath.get(); + auto dirtyFiles = repo->getDirtyFilesAgainstTree(hash, checkoutPath); + + for (const auto & dirtyFile : dirtyFiles) { + auto filePath = dirtyFile.abs(); + + // Find which zone this file belongs to + for (auto & [zonePath, dirty] : dirtyZones) { + // Normalize zone path for comparison (remove leading //) + std::string normalizedZonePath = zonePath; + if (hasPrefix(normalizedZonePath, "//")) + normalizedZonePath = normalizedZonePath.substr(1); // keep one / + + if (hasPrefix(filePath, normalizedZonePath + "/") || filePath == normalizedZonePath) { + dirtyZones[zonePath] = true; + break; + } + } + } + } + } + } + } + } + + tectonixDirtyZones = std::move(dirtyZones); + return *tectonixDirtyZones; +} + inline static bool isJustSchemePrefix(std::string_view prefix) { return !prefix.empty() && prefix[prefix.size() - 1] == ':' diff --git a/src/libexpr/include/nix/expr/eval-settings.hh b/src/libexpr/include/nix/expr/eval-settings.hh index f367541ec2f..58138e86fbb 100644 --- a/src/libexpr/include/nix/expr/eval-settings.hh +++ b/src/libexpr/include/nix/expr/eval-settings.hh @@ -399,6 +399,41 @@ struct EvalSettings : Config Note that enabling the debugger (`--debugger`) disables multi-threaded evaluation. )"}; + + Setting tectonixGitDir{ + this, + "", + "tectonix-git-dir", + R"( + Path to the git directory for tectonix builtins (e.g., `~/world/git`). + + This enables the tectonix builtins (`builtins.unsafeTectonixInternalTreeSha`, `builtins.unsafeTectonixInternalTree`, + `builtins.unsafeTectonixInternalFile`, `builtins.unsafeTectonixInternalZoneSrc`, `builtins.unsafeTectonixInternalDir`) which provide + native access to files from a git repository during Nix evaluation. + )"}; + + Setting tectonixGitSha{ + this, + "", + "tectonix-git-sha", + R"( + Git commit SHA to use for tectonix builtins. + + This specifies the commit to read from when using tectonix builtins. + Typically set to HEAD of the repository. + )"}; + + Setting tectonixCheckoutPath{ + this, + "", + "tectonix-checkout-path", + R"( + Path to checkout directory for source-available mode. + + When set, uncommitted files in the checkout are preferred over git content + for tectonix builtins. This enables local development workflows where changes + are visible before committing. + )"}; }; /** diff --git a/src/libexpr/include/nix/expr/eval.hh b/src/libexpr/include/nix/expr/eval.hh index c9cfb1a573b..8880499a03b 100644 --- a/src/libexpr/include/nix/expr/eval.hh +++ b/src/libexpr/include/nix/expr/eval.hh @@ -26,6 +26,7 @@ #include #include +#include #include namespace nix { @@ -52,6 +53,7 @@ enum RepairFlag : bool; struct MemorySourceAccessor; struct MountedSourceAccessor; struct AsyncPathWriter; +struct GitRepo; namespace eval_cache { class EvalCache; @@ -513,6 +515,24 @@ private: */ const ref regexCache; + /** Lazy-initialized git repository for world builtins */ + mutable std::optional> worldRepo; + + /** Lazy-initialized source accessor for world git content */ + mutable std::optional> worldGitAccessor; + + /** Lazy-initialized source accessor for world checkout (source-available mode) */ + mutable std::optional> worldCheckoutAccessor; + + /** Cache: world path → tree SHA (lazy computed, cached at each path level) */ + const ref> worldTreeShaCache; + + /** Lazy-initialized set of zone IDs in sparse checkout */ + mutable std::optional> tectonixSparseCheckoutRoots; + + /** Lazy-initialized map of zone path → dirty status (only for sparse-checked-out zones) */ + mutable std::optional> tectonixDirtyZones; + public: /** @@ -544,6 +564,27 @@ public: return lookupPath; } + /** Get the world git repository, initializing lazily */ + ref getWorldRepo() const; + + /** Get accessor for world git content at worldSha */ + ref getWorldGitAccessor() const; + + /** Get accessor for world checkout (only in source-available mode) */ + std::optional> getWorldCheckoutAccessor() const; + + /** Get tree SHA for a world path, with lazy caching */ + Hash getWorldTreeSha(std::string_view worldPath) const; + + /** Check if we're in source-available mode */ + bool isTectonixSourceAvailable() const; + + /** Get set of zone IDs in sparse checkout (source-available mode only) */ + const std::set & getTectonixSparseCheckoutRoots() const; + + /** Get map of zone path → dirty status (only for sparse-checked-out zones) */ + const std::map & getTectonixDirtyZones() const; + /** * Return a `SourcePath` that refers to `path` in the root * filesystem. diff --git a/src/libexpr/primops/meson.build b/src/libexpr/primops/meson.build index b8abc6409af..5d948a49c3f 100644 --- a/src/libexpr/primops/meson.build +++ b/src/libexpr/primops/meson.build @@ -9,4 +9,5 @@ sources += files( 'fetchMercurial.cc', 'fetchTree.cc', 'fromTOML.cc', + 'tectonix.cc', ) diff --git a/src/libexpr/primops/tectonix.cc b/src/libexpr/primops/tectonix.cc new file mode 100644 index 00000000000..29025ad0be3 --- /dev/null +++ b/src/libexpr/primops/tectonix.cc @@ -0,0 +1,493 @@ +#include "nix/expr/primops.hh" +#include "nix/expr/eval-inline.hh" +#include "nix/expr/eval-settings.hh" +#include "nix/fetchers/git-utils.hh" +#include "nix/store/store-api.hh" +#include "nix/fetchers/fetch-to-store.hh" + +#include + +namespace nix { + +// Helper to read the manifest JSON content +static std::string readManifestContent(EvalState & state, const PosIdx pos) +{ + auto fullPath = CanonPath("/.meta/manifest.json"); + + // In source-available mode, check checkout first + if (state.isTectonixSourceAvailable()) { + auto checkoutAccessor = state.getWorldCheckoutAccessor(); + if (checkoutAccessor) { + auto checkoutPath = state.settings.tectonixCheckoutPath.get(); + auto checkoutFullPath = CanonPath(checkoutPath + fullPath.abs()); + if ((*checkoutAccessor)->pathExists(checkoutFullPath)) { + return (*checkoutAccessor)->readFile(checkoutFullPath); + } + } + } + + // Fall back to git + auto accessor = state.getWorldGitAccessor(); + if (!accessor->pathExists(fullPath)) + state.error("manifest.json does not exist at //.meta/manifest.json in world") + .atPos(pos).debugThrow(); + + return accessor->readFile(fullPath); +} + +// ============================================================================ +// builtins.worldManifest +// Returns path -> zoneId mapping from //.meta/manifest.json +// ============================================================================ +static void prim_worldManifest(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto content = readManifestContent(state, pos); + auto json = nlohmann::json::parse(content); + + auto attrs = state.buildBindings(json.size()); + for (auto & [path, value] : json.items()) { + auto & id = value.at("id"); + attrs.alloc(state.symbols.create(path)).mkString(id.get(), state.mem); + } + v.mkAttrs(attrs); +} + +static RegisterPrimOp primop_worldManifest({ + .name = "__unsafeTectonixInternalManifest", + .args = {}, + .doc = R"( + Get the world manifest as a Nix attrset mapping zone paths to zone IDs. + + Example: `builtins.unsafeTectonixInternalManifest."//areas/tools/dev"` returns `"W-123456"`. + + Requires `--tectonix-git-dir` and `--tectonix-git-sha` to be set. + )", + .fun = prim_worldManifest, +}); + +// ============================================================================ +// builtins.worldManifestInverted +// Returns zoneId -> path mapping (inverse of worldManifest) +// ============================================================================ +static void prim_worldManifestInverted(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto content = readManifestContent(state, pos); + auto json = nlohmann::json::parse(content); + + auto attrs = state.buildBindings(json.size()); + for (auto & [path, value] : json.items()) { + auto & id = value.at("id"); + attrs.alloc(state.symbols.create(id.get())).mkString(path, state.mem); + } + v.mkAttrs(attrs); +} + +static RegisterPrimOp primop_worldManifestInverted({ + .name = "__unsafeTectonixInternalManifestInverted", + .args = {}, + .doc = R"( + Get the inverted world manifest as a Nix attrset mapping zone IDs to zone paths. + + Example: `builtins.unsafeTectonixInternalManifestInverted."W-123456"` returns `"//areas/tools/dev"`. + + Requires `--tectonix-git-dir` and `--tectonix-git-sha` to be set. + )", + .fun = prim_worldManifestInverted, +}); + +// ============================================================================ +// builtins.unsafeTectonixInternalTreeSha worldPath +// Returns the git tree SHA for a world path +// ============================================================================ +static void prim_unsafeTectonixInternalTreeSha(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto worldPath = state.forceStringNoCtx(*args[0], pos, + "while evaluating the 'worldPath' argument to builtins.unsafeTectonixInternalTreeSha"); + + auto sha = state.getWorldTreeSha(worldPath); + v.mkString(sha.gitRev(), state.mem); +} + +static RegisterPrimOp primop_unsafeTectonixInternalTreeSha({ + .name = "__unsafeTectonixInternalTreeSha", + .args = {"worldPath"}, + .doc = R"( + Get the git tree SHA for a path in the world repository. + + Example: `builtins.unsafeTectonixInternalTreeSha "//areas/tools/tec"` returns the tree SHA + for that zone. + + Requires `--tectonix-git-dir` and `--tectonix-git-sha` to be set. + )", + .fun = prim_unsafeTectonixInternalTreeSha, +}); + +// ============================================================================ +// builtins.unsafeTectonixInternalTree treeSha +// Returns a store path containing the tree contents +// ============================================================================ +static void prim_unsafeTectonixInternalTree(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto treeSha = state.forceStringNoCtx(*args[0], pos, + "while evaluating the 'treeSha' argument to builtins.unsafeTectonixInternalTree"); + + auto repo = state.getWorldRepo(); + auto hash = Hash::parseNonSRIUnprefixed(treeSha, HashAlgorithm::SHA1); + + if (!repo->hasObject(hash)) + state.error("tree SHA '%s' not found in world repository", treeSha) + .atPos(pos).debugThrow(); + + GitAccessorOptions opts{.exportIgnore = false, .smudgeLfs = false}; + auto accessor = repo->getAccessor(hash, opts, "world-tree"); + + auto storePath = fetchToStore( + state.fetchSettings, + *state.store, + SourcePath(accessor, CanonPath::root), + FetchMode::Copy, + "world-tree-" + std::string(treeSha).substr(0, 8)); + + state.allowAndSetStorePathString(storePath, v); +} + +static RegisterPrimOp primop_unsafeTectonixInternalTree({ + .name = "__unsafeTectonixInternalTree", + .args = {"treeSha"}, + .doc = R"( + Fetch a git tree by SHA from the world repository and return it as a store path. + + Example: `builtins.unsafeTectonixInternalTree "abc123..."` returns `/nix/store/...-world-tree-abc123`. + + Requires `--tectonix-git-dir` to be set. + )", + .fun = prim_unsafeTectonixInternalTree, +}); + +// ============================================================================ +// builtins.unsafeTectonixInternalFile path +// Returns file contents as a string +// ============================================================================ +static void prim_unsafeTectonixInternalFile(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto worldPath = state.forceStringNoCtx(*args[0], pos, + "while evaluating the 'path' argument to builtins.unsafeTectonixInternalFile"); + + // Normalize path (remove leading //) + std::string path(worldPath); + if (hasPrefix(path, "//")) + path = path.substr(2); + + auto fullPath = CanonPath("/" + path); + + // In source-available mode, check checkout first + if (state.isTectonixSourceAvailable()) { + auto checkoutAccessor = state.getWorldCheckoutAccessor(); + if (checkoutAccessor) { + auto checkoutPath = state.settings.tectonixCheckoutPath.get(); + auto checkoutFullPath = CanonPath(checkoutPath + fullPath.abs()); + if ((*checkoutAccessor)->pathExists(checkoutFullPath)) { + auto content = (*checkoutAccessor)->readFile(checkoutFullPath); + v.mkString(content, state.mem); + return; + } + } + } + + // Fall back to git + auto accessor = state.getWorldGitAccessor(); + if (!accessor->pathExists(fullPath)) + state.error("path '%s' does not exist in world", fullPath) + .atPos(pos).debugThrow(); + + auto content = accessor->readFile(fullPath); + v.mkString(content, state.mem); +} + +static RegisterPrimOp primop_unsafeTectonixInternalFile({ + .name = "__unsafeTectonixInternalFile", + .args = {"path"}, + .doc = R"( + Read a file from the world repository. + + In source-available mode (--tectonix-checkout-path set), prefers checkout files. + + Example: `builtins.unsafeTectonixInternalFile "//areas/tools/tec/zone.nix"` + + Requires `--tectonix-git-dir` and `--tectonix-git-sha` to be set. + )", + .fun = prim_unsafeTectonixInternalFile, +}); + +// ============================================================================ +// builtins.worldZoneFile zonePath pathInZone +// Returns file contents as a string +// ============================================================================ +static void prim_worldZoneFile(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto zonePath = state.forceStringNoCtx(*args[0], pos, + "while evaluating the 'zonePath' argument to builtins.worldZoneFile"); + auto pathInZone = state.forceStringNoCtx(*args[1], pos, + "while evaluating the 'pathInZone' argument to builtins.worldZoneFile"); + + // Normalize zone path (remove leading //) + std::string zone(zonePath); + if (hasPrefix(zone, "//")) + zone = zone.substr(2); + + auto fullPath = CanonPath("/" + zone + "/" + std::string(pathInZone)); + + // In source-available mode, check checkout first + if (state.isTectonixSourceAvailable()) { + auto checkoutAccessor = state.getWorldCheckoutAccessor(); + if (checkoutAccessor) { + auto checkoutPath = state.settings.tectonixCheckoutPath.get(); + auto checkoutFullPath = CanonPath(checkoutPath + fullPath.abs()); + if ((*checkoutAccessor)->pathExists(checkoutFullPath)) { + auto content = (*checkoutAccessor)->readFile(checkoutFullPath); + v.mkString(content, state.mem); + return; + } + } + } + + // Fall back to git + auto accessor = state.getWorldGitAccessor(); + if (!accessor->pathExists(fullPath)) + state.error("path '%s' does not exist in world", fullPath) + .atPos(pos).debugThrow(); + + auto content = accessor->readFile(fullPath); + v.mkString(content, state.mem); +} + +static RegisterPrimOp primop_worldZoneFile({ + .name = "worldZoneFile", + .args = {"zonePath", "pathInZone"}, + .doc = R"( + Read a file from a zone in the world repository. + + In source-available mode (--tectonix-checkout-path set), prefers checkout files. + + Example: `builtins.worldZoneFile "//areas/tools/tec" "zone.nix"` + + Requires `--tectonix-git-dir` and `--tectonix-git-sha` to be set. + )", + .fun = prim_worldZoneFile, +}); + +// ============================================================================ +// builtins.unsafeTectonixInternalZoneSrc zonePath +// Returns a store path containing the zone source +// ============================================================================ +static void prim_unsafeTectonixInternalZoneSrc(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto zonePath = state.forceStringNoCtx(*args[0], pos, + "while evaluating the 'zonePath' argument to builtins.unsafeTectonixInternalZoneSrc"); + + // Normalize zone path + std::string zone(zonePath); + if (hasPrefix(zone, "//")) + zone = zone.substr(2); + + auto fullPath = CanonPath("/" + zone); + std::string name = "zone-" + replaceStrings(zone, "/", "-"); + + // In source-available mode with dirty zone, use checkout + auto & dirtyZones = state.getTectonixDirtyZones(); + auto it = dirtyZones.find(std::string(zonePath)); + bool isDirty = it != dirtyZones.end() && it->second; + if (state.isTectonixSourceAvailable() && isDirty) { + auto checkoutAccessor = state.getWorldCheckoutAccessor(); + if (!checkoutAccessor) + state.error("checkout accessor not available").atPos(pos).debugThrow(); + + auto checkoutPath = state.settings.tectonixCheckoutPath.get(); + auto checkoutFullPath = CanonPath(checkoutPath + fullPath.abs()); + + auto storePath = fetchToStore( + state.fetchSettings, + *state.store, + SourcePath(*checkoutAccessor, checkoutFullPath), + FetchMode::Copy, + name); + + state.allowAndSetStorePathString(storePath, v); + } else { + // Use git content + auto treeSha = state.getWorldTreeSha(zonePath); + auto repo = state.getWorldRepo(); + GitAccessorOptions opts{.exportIgnore = true, .smudgeLfs = false}; + auto accessor = repo->getAccessor(treeSha, opts, "world-zone"); + + auto storePath = fetchToStore( + state.fetchSettings, + *state.store, + SourcePath(accessor, CanonPath::root), + FetchMode::Copy, + name); + + state.allowAndSetStorePathString(storePath, v); + } +} + +static RegisterPrimOp primop_unsafeTectonixInternalZoneSrc({ + .name = "__unsafeTectonixInternalZoneSrc", + .args = {"zonePath"}, + .doc = R"( + Get the source of a zone as a store path. + + In source-available mode with uncommitted changes, uses checkout content. + Otherwise uses git content. + + Example: `builtins.unsafeTectonixInternalZoneSrc "//areas/tools/tec"` + + Requires `--tectonix-git-dir` and `--tectonix-git-sha` to be set. + )", + .fun = prim_unsafeTectonixInternalZoneSrc, +}); + +// ============================================================================ +// builtins.unsafeTectonixInternalDir zonePath pathInZone +// Returns directory listing as attrset +// ============================================================================ +static void prim_unsafeTectonixInternalDir(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto zonePath = state.forceStringNoCtx(*args[0], pos, + "while evaluating the 'zonePath' argument to builtins.unsafeTectonixInternalDir"); + auto pathInZone = state.forceStringNoCtx(*args[1], pos, + "while evaluating the 'pathInZone' argument to builtins.unsafeTectonixInternalDir"); + + // Normalize path + std::string zone(zonePath); + if (hasPrefix(zone, "//")) + zone = zone.substr(2); + + auto fullPath = CanonPath("/" + zone + "/" + std::string(pathInZone)); + + // Determine which accessor to use + ref accessor = state.getWorldGitAccessor(); + CanonPath accessPath = fullPath; + + if (state.isTectonixSourceAvailable()) { + auto checkoutAccessor = state.getWorldCheckoutAccessor(); + if (checkoutAccessor) { + auto checkoutPath = state.settings.tectonixCheckoutPath.get(); + auto checkoutFullPath = CanonPath(checkoutPath + fullPath.abs()); + if ((*checkoutAccessor)->pathExists(checkoutFullPath)) { + accessor = *checkoutAccessor; + accessPath = checkoutFullPath; + } + } + } + + if (!accessor->pathExists(accessPath)) + state.error("path '%s' does not exist in world", fullPath) + .atPos(pos).debugThrow(); + + auto entries = accessor->readDirectory(accessPath); + + auto attrs = state.buildBindings(entries.size()); + for (auto & [name, typeOpt] : entries) { + const char * typeStr; + if (!typeOpt) { + typeStr = "unknown"; + } else { + switch (*typeOpt) { + case SourceAccessor::Type::tRegular: typeStr = "regular"; break; + case SourceAccessor::Type::tDirectory: typeStr = "directory"; break; + case SourceAccessor::Type::tSymlink: typeStr = "symlink"; break; + case SourceAccessor::Type::tChar: + case SourceAccessor::Type::tBlock: + case SourceAccessor::Type::tSocket: + case SourceAccessor::Type::tFifo: + case SourceAccessor::Type::tUnknown: + typeStr = "unknown"; break; + } + } + attrs.alloc(state.symbols.create(name)).mkString(typeStr, state.mem); + } + v.mkAttrs(attrs); +} + +static RegisterPrimOp primop_unsafeTectonixInternalDir({ + .name = "__unsafeTectonixInternalDir", + .args = {"zonePath", "pathInZone"}, + .doc = R"( + List directory contents from the world repository. + + Returns an attrset mapping names to types ("regular", "directory", "symlink"). + + Example: `builtins.unsafeTectonixInternalDir "//areas/tools/tec" "src"` + + Requires `--tectonix-git-dir` and `--tectonix-git-sha` to be set. + )", + .fun = prim_unsafeTectonixInternalDir, +}); + +// ============================================================================ +// builtins.unsafeTectonixInternalSparseCheckoutRoots +// Returns list of zone IDs in sparse checkout +// ============================================================================ +static void prim_unsafeTectonixInternalSparseCheckoutRoots(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto & roots = state.getTectonixSparseCheckoutRoots(); + + auto list = state.buildList(roots.size()); + size_t i = 0; + for (const auto & root : roots) { + (list[i++] = state.allocValue())->mkString(root, state.mem); + } + v.mkList(list); +} + +static RegisterPrimOp primop_unsafeTectonixInternalSparseCheckoutRoots({ + .name = "__unsafeTectonixInternalSparseCheckoutRoots", + .args = {}, + .doc = R"( + Get the list of zone IDs that are in the sparse checkout. + + Returns an empty list if not in source-available mode or if no + sparse-checkout-roots file exists. + + Example: `builtins.unsafeTectonixInternalSparseCheckoutRoots` returns `["W-000000" "W-1337af" ...]`. + + Requires `--tectonix-checkout-path` to be set. + )", + .fun = prim_unsafeTectonixInternalSparseCheckoutRoots, +}); + +// ============================================================================ +// builtins.unsafeTectonixInternalDirtyZones +// Returns map of zone paths to dirty status +// ============================================================================ +static void prim_unsafeTectonixInternalDirtyZones(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto & dirtyZones = state.getTectonixDirtyZones(); + + auto attrs = state.buildBindings(dirtyZones.size()); + for (const auto & [zonePath, dirty] : dirtyZones) { + attrs.alloc(state.symbols.create(zonePath)).mkBool(dirty); + } + v.mkAttrs(attrs); +} + +static RegisterPrimOp primop_unsafeTectonixInternalDirtyZones({ + .name = "__unsafeTectonixInternalDirtyZones", + .args = {}, + .doc = R"( + Get the dirty status of zones in the sparse checkout. + + Returns an attrset mapping zone paths to booleans indicating whether + the zone has uncommitted changes. + + Only includes zones that are in the sparse checkout. + + Example: `builtins.unsafeTectonixInternalDirtyZones."//areas/tools/dev"` returns `true` or `false`. + + Requires `--tectonix-checkout-path` to be set. + )", + .fun = prim_unsafeTectonixInternalDirtyZones, +}); + +} // namespace nix diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index f21313a1040..3028a76eeba 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -108,6 +109,13 @@ static void initLibGit2() std::call_once(initialized, []() { if (git_libgit2_init() < 0) throw Error("initialising libgit2: %s", git_error_last()->message); + + // Register support for additional git extensions. + // This allows opening repos with extensions that libgit2 doesn't natively support, + // as long as we don't actually need the extension's functionality. + // "refstorage" is used by reftables - we can ignore it since we only access objects by SHA. + const char * extensions[] = { "refstorage" }; + git_libgit2_opts(GIT_OPT_SET_EXTENSIONS, extensions, 1); }); } @@ -265,6 +273,23 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this { initLibGit2(); + if (options.odbOnly) { + /* Open only the object database, bypassing full repository validation. + This is useful for repositories with unsupported extensions like reftables. + We create a fake repository wrapping the ODB for API compatibility. */ + + git_odb * odb = nullptr; + if (git_odb_open(&odb, (path / "objects").string().c_str())) + throw Error("opening Git object database %s: %s", path / "objects", git_error_last()->message); + + // git_repository_wrap_odb takes ownership of the ODB + if (git_repository_wrap_odb(Setter(repo), odb)) + throw Error("wrapping Git object database: %s", git_error_last()->message); + + // No mempack backend needed for read-only ODB access + return; + } + initRepoAtomically(path, options); if (git_repository_open(Setter(repo), path.string().c_str())) throw Error("opening Git repository %s: %s", path, git_error_last()->message); @@ -595,6 +620,66 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this return true; } + Hash getSubtreeSha(const Hash & treeSha, const std::string & entryName) override + { + git_tree * tree = nullptr; + auto oid = hashToOID(treeSha); + + if (git_tree_lookup(&tree, *this, &oid)) + throw Error("looking up tree %s: %s", treeSha.gitRev(), git_error_last()->message); + + Finally freeTree([&]() { git_tree_free(tree); }); + + auto entry = git_tree_entry_byname(tree, entryName.c_str()); + if (!entry) + throw Error("entry '%s' not found in tree %s", entryName, treeSha.gitRev()); + + return toHash(*git_tree_entry_id(entry)); + } + + Hash getCommitTree(const Hash & commitSha) override + { + auto oid = hashToOID(commitSha); + auto obj = lookupObject(*this, oid); + auto tree = peelObject(obj.get(), GIT_OBJECT_TREE); + return toHash(*git_object_id(tree.get())); + } + + std::set getDirtyFilesAgainstTree(const Hash & commitSha, const std::filesystem::path & workdirPath) override + { + std::set dirtyFiles; + + // Set workdir on the repo + if (git_repository_set_workdir(repo.get(), workdirPath.string().c_str(), 0)) + throw Error("setting workdir on repository: %s", git_error_last()->message); + + // Get tree from commit + auto oid = hashToOID(commitSha); + auto obj = lookupObject(*this, oid); + auto tree = peelObject(obj.get(), GIT_OBJECT_TREE); + + // Create diff between tree and workdir, using index for faster stat cache + git_diff * diff = nullptr; + git_diff_options opts = GIT_DIFF_OPTIONS_INIT; + opts.flags = GIT_DIFF_INCLUDE_UNTRACKED | GIT_DIFF_RECURSE_UNTRACKED_DIRS; + + if (git_diff_tree_to_workdir_with_index(&diff, repo.get(), tree.get(), &opts)) + throw Error("creating diff: %s", git_error_last()->message); + + // Collect dirty file paths + size_t numDeltas = git_diff_num_deltas(diff); + for (size_t i = 0; i < numDeltas; i++) { + const git_diff_delta * delta = git_diff_get_delta(diff, i); + // Use new_file path for adds/modifies, old_file for deletes + const char * path = delta->new_file.path ? delta->new_file.path : delta->old_file.path; + if (path) + dirtyFiles.insert(CanonPath(path)); + } + + git_diff_free(diff); + return dirtyFiles; + } + /** * A 'GitSourceAccessor' with no regard for export-ignore. */ @@ -876,8 +961,18 @@ struct GitSourceAccessor : SourceAccessor for (size_t n = 0; n < count; ++n) { auto entry = git_tree_entry_byindex(tree.get(), n); + auto mode = git_tree_entry_filemode(entry); + std::optional type; + if (mode == GIT_FILEMODE_TREE) + type = Type::tDirectory; + else if (mode == GIT_FILEMODE_BLOB || mode == GIT_FILEMODE_BLOB_EXECUTABLE) + type = Type::tRegular; + else if (mode == GIT_FILEMODE_LINK) + type = Type::tSymlink; + else if (mode == GIT_FILEMODE_COMMIT) + type = Type::tDirectory; // submodule // FIXME: add to cache - res.emplace(std::string(git_tree_entry_name(entry)), DirEntry{}); + res.emplace(std::string(git_tree_entry_name(entry)), type); } return res; diff --git a/src/libfetchers/include/nix/fetchers/git-utils.hh b/src/libfetchers/include/nix/fetchers/git-utils.hh index eada8745c3e..ca6ddd39f5f 100644 --- a/src/libfetchers/include/nix/fetchers/git-utils.hh +++ b/src/libfetchers/include/nix/fetchers/git-utils.hh @@ -40,6 +40,11 @@ struct GitRepo bool create = false; bool bare = false; bool packfilesOnly = false; + /** + * Open only the object database, bypassing full repository validation. + * Useful for repos with unsupported extensions (e.g., reftables). + */ + bool odbOnly = false; }; static ref openRepo(const std::filesystem::path & path, Options options); @@ -104,6 +109,18 @@ struct GitRepo virtual bool hasObject(const Hash & oid) = 0; + /** Get the SHA of a subtree entry within a tree object */ + virtual Hash getSubtreeSha(const Hash & treeSha, const std::string & entryName) = 0; + + /** Get the root tree SHA from a commit SHA */ + virtual Hash getCommitTree(const Hash & commitSha) = 0; + + /** + * Get list of dirty files by comparing a commit's tree against a workdir. + * Works with ODB-only repos (no refs needed). + */ + virtual std::set getDirtyFilesAgainstTree(const Hash & commitSha, const std::filesystem::path & workdirPath) = 0; + virtual ref getAccessor(const Hash & rev, const GitAccessorOptions & options, std::string displayPrefix) = 0; From 06585f4eeaf442c230e130aae65a0b139144809b Mon Sep 17 00:00:00 2001 From: Burke Libbey Date: Mon, 22 Dec 2025 15:31:52 -0500 Subject: [PATCH 2/6] Integrate tectonix zones with lazy-trees infrastructure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When lazy-trees is enabled, zone sources are mounted lazily using GitSourceAccessor and only copied to the store when used as derivation inputs (devirtualized). Key changes: - Add tectonixZoneCache_ for tree SHA -> store path deduplication - Add getZoneStorePath() orchestrating lazy vs eager zone fetching - Add mountZoneByTreeSha() for lazy mounting clean zones - Add getZoneFromCheckout() as extension point for dirty zones (eager) - Add getOrMountWorldRoot() for read-only world access New builtins: - worldZone: Returns { outPath, root, treeSha, zonePath, dirty } - outPath: string with context for derivation src - root: path for reading files without devirtualization - worldRoot: Returns path to world root for read-only eval access Simplify __unsafeTectonixInternalZoneSrc to use getZoneStorePath(). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- LAZY_TREES_PLAN.md | 565 +++++++++++++++++++++++++++ src/libexpr/eval.cc | 166 +++++++- src/libexpr/include/nix/expr/eval.hh | 38 ++ src/libexpr/primops/tectonix.cc | 157 +++++--- 4 files changed, 875 insertions(+), 51 deletions(-) create mode 100644 LAZY_TREES_PLAN.md diff --git a/LAZY_TREES_PLAN.md b/LAZY_TREES_PLAN.md new file mode 100644 index 00000000000..0bc846bad16 --- /dev/null +++ b/LAZY_TREES_PLAN.md @@ -0,0 +1,565 @@ +# Tectonix Lazy Trees Integration Plan + +This document outlines the plan to integrate tectonix zone access with Nix's lazy-trees infrastructure, enabling on-demand copying of zone sources to the store. + +## Background + +### Current Behavior + +When `builtins.unsafeTectonixInternalZoneSrc "//areas/tools/tec"` is called, the entire zone is immediately copied to the Nix store via `fetchToStore()`, regardless of whether the zone content is actually needed for a derivation. + +### Lazy Trees in Flakes + +With `lazy-trees = true`, flakes avoid this eager copying: + +1. `mountInput()` creates a random store path and mounts a `GitSourceAccessor` at that path +2. Files are read on-demand from the git ODB during evaluation +3. Only when the path is used as a derivation input does `devirtualize()` copy it to the store + +### Goal + +Apply the same lazy behavior to tectonix zones, while respecting zone boundaries and dirty zone detection. + +--- + +## Architectural Comparison: Flakes vs Tectonix + +### Flakes + +``` +FlakeRef (github:nixos/nixpkgs/abc123) + │ + ▼ +InputCache.getAccessor() + │ + ▼ +Input.getAccessor() → GitSourceAccessor (lazy) + │ + ▼ +mountInput() + │ + ├─► lazyTrees=false: fetchToStore() immediately + │ + └─► lazyTrees=true: + StorePath::random("nixpkgs") + storeFS->mount(storePath, accessor) + return virtual path + +Later, when used in derivation: + │ + ▼ +devirtualize() → fetchToStore() → real store path +``` + +**Key point:** Each flake is its own unit. The accessor is rooted at the flake, and the whole flake gets mounted at one store path. + +### Tectonix Challenge + +``` +world @ sha:abc123 +├── areas/ +│ ├── tools/ +│ │ ├── tec/ ← Zone (tree: deadbeef) +│ │ ├── dev/ ← Zone (tree: cafebabe) +│ │ └── ... +│ └── platform/ +│ └── ... +└── .meta/ + └── manifest.json + +Problem: Can't mount whole world at one path! + +Using /nix/store/xxx-world/areas/tools/tec as derivation src +would pull in the ENTIRE world when devirtualized. + +Solution: Mount each zone separately at its own store path. +``` + +### What Makes Tectonix Harder + +1. **Granularity mismatch**: Flakes = one input = one mount. World = one repo = thousands of zones. +2. **No `Input` abstraction**: Flakes have `fetchers::Input` with `getAccessor()`, caching, locking. Tectonix builtins are ad-hoc. +3. **Dirty zone complexity**: Flakes mark dirty inputs as "unlocked". Tectonix needs zone-granular dirty detection with checkout fallback. +4. **Two-mode operation**: Git ODB vs checkout. Flakes only have one source per input. + +### What Makes Tectonix Easier + +1. **Content-addressed by nature**: Tree SHA is the *perfect* cache key. Same tree SHA across different world commits = identical content. +2. **No resolution complexity**: No registries, no indirect references, no lock file management. +3. **Already have the accessor**: `getWorldGitAccessor()` returns a lazy `GitSourceAccessor`. +4. **Single source of truth**: One repo, one commit SHA. + +--- + +## Design + +### Core Concept: Zone Mounts by Tree SHA + +``` +builtins.worldZone "//areas/tools/tec" + │ + ▼ +getZoneStorePath(zonePath) + │ + ├─► isDirty? ─────────────────────────────┐ + │ │ │ + │ ▼ │ + │ getZoneFromCheckout() │ + │ (EXTENSION POINT: eager for now) │ + │ │ │ + │ ▼ │ + │ return store path ◄────────────────────┘ + │ + └─► !isDirty + │ + ▼ + treeSha = getWorldTreeSha(zonePath) + │ + ▼ + mountZoneByTreeSha(treeSha) + │ + ├─► cached? return cached store path + │ + └─► not cached: + accessor = repo->getAccessor(treeSha) + storePath = StorePath::random(name) + storeFS->mount(storePath, accessor) + cache[treeSha] = storePath + return storePath +``` + +### Why Tree SHA as Cache Key + +``` +World @ v1 (sha: aaa) World @ v2 (sha: bbb) +├── areas/tools/tec ├── areas/tools/tec +│ (tree: deadbeef) ─────────────│ (tree: deadbeef) ← SAME! +│ │ +├── areas/tools/dev ├── areas/tools/dev +│ (tree: cafebabe) │ (tree: 12345678) ← Changed +``` + +If `//areas/tools/tec` didn't change between commits, its tree SHA is identical. The zone cache returns the same virtual store path, and when devirtualized, the same real store path. **Natural deduplication across world revisions.** + +--- + +## Implementation + +### Phase 1: Core Infrastructure + +#### 1.1 EvalState Additions (`src/libexpr/include/nix/expr/eval.hh`) + +```cpp +// In EvalState class: + +private: + /** + * Cache tree SHA → virtual store path for lazy zone mounts. + * Thread-safe for eval-cores > 1. + */ + Sync> tectonixZoneCache_; + +public: + /** + * Get a zone's store path, handling dirty detection and lazy mounting. + * + * For clean zones with lazy-trees enabled: mounts accessor lazily + * For dirty zones: currently eager-copies from checkout (extension point) + * For lazy-trees disabled: eager-copies from git + */ + StorePath getZoneStorePath(std::string_view zonePath); + +private: + /** + * Mount a zone by tree SHA, returning a (potentially virtual) store path. + * Caches by tree SHA for deduplication across world revisions. + */ + StorePath mountZoneByTreeSha(const Hash & treeSha, std::string_view zonePath); + + /** + * Get zone store path from checkout (for dirty zones). + * EXTENSION POINT: Currently always eager. Could be made lazy later. + */ + StorePath getZoneFromCheckout(std::string_view zonePath); +``` + +#### 1.2 Implementation (`src/libexpr/eval.cc`) + +```cpp +StorePath EvalState::getZoneStorePath(std::string_view zonePath) +{ + // Normalize path + std::string zone(zonePath); + if (hasPrefix(zone, "//")) + zone = zone.substr(2); + + // Check dirty status + bool isDirty = false; + if (isTectonixSourceAvailable()) { + auto & dirtyZones = getTectonixDirtyZones(); + auto it = dirtyZones.find(std::string(zonePath)); + isDirty = it != dirtyZones.end() && it->second; + } + + if (isDirty) { + // EXTENSION POINT: For now, always eager from checkout + return getZoneFromCheckout(zonePath); + } + + // Clean zone: get tree SHA + auto treeSha = getWorldTreeSha(zonePath); + + if (!settings.lazyTrees) { + // Eager mode: immediate copy from git ODB + auto repo = getWorldRepo(); + GitAccessorOptions opts{.exportIgnore = true, .smudgeLfs = false}; + auto accessor = repo->getAccessor(treeSha, opts, "zone"); + + std::string name = "zone-" + replaceStrings(zone, "/", "-"); + auto storePath = fetchToStore( + fetchSettings, *store, + SourcePath(accessor, CanonPath::root), + FetchMode::Copy, name); + + allowPath(storePath); + return storePath; + } + + // Lazy mode: mount by tree SHA + return mountZoneByTreeSha(treeSha, zonePath); +} + +StorePath EvalState::mountZoneByTreeSha(const Hash & treeSha, std::string_view zonePath) +{ + // Check cache first (thread-safe) + { + auto cache = tectonixZoneCache_.readLock(); + auto it = cache->find(treeSha); + if (it != cache->end()) { + debug("zone cache hit for tree %s", treeSha.gitRev()); + return it->second; + } + } + + // Not cached: create accessor and mount + auto repo = getWorldRepo(); + GitAccessorOptions opts{.exportIgnore = true, .smudgeLfs = false}; + auto accessor = repo->getAccessor(treeSha, opts, "zone"); + + // Generate name from zone path + std::string zone(zonePath); + if (hasPrefix(zone, "//")) + zone = zone.substr(2); + std::string name = "zone-" + replaceStrings(zone, "/", "-"); + + // Create virtual store path + auto storePath = StorePath::random(name); + allowPath(storePath); + + // Mount accessor at this path + storeFS->mount(CanonPath(store->printStorePath(storePath)), accessor); + + // Cache it (thread-safe) + { + auto cache = tectonixZoneCache_.lock(); + auto [it, inserted] = cache->try_emplace(treeSha, storePath); + if (!inserted) { + // Another thread beat us, use their path + return it->second; + } + } + + debug("mounted zone %s (tree %s) at %s", + zonePath, treeSha.gitRev(), store->printStorePath(storePath)); + + return storePath; +} + +StorePath EvalState::getZoneFromCheckout(std::string_view zonePath) +{ + // EXTENSION POINT: Currently always eager. + // + // To make this lazy later, we'd need to: + // 1. Create a filtered accessor over the checkout path + // 2. Compute a content key (hash of modified files? mtime-based?) + // 3. Cache and mount like mountZoneByTreeSha + // + // For now: just copy from checkout. + + std::string zone(zonePath); + if (hasPrefix(zone, "//")) + zone = zone.substr(2); + + auto checkoutAccessor = getWorldCheckoutAccessor(); + if (!checkoutAccessor) + throw Error("checkout accessor not available for dirty zone '%s'", zonePath); + + auto checkoutPath = settings.tectonixCheckoutPath.get(); + auto fullPath = CanonPath(checkoutPath + "/" + zone); + + std::string name = "zone-" + replaceStrings(zone, "/", "-"); + + auto storePath = fetchToStore( + fetchSettings, *store, + SourcePath(*checkoutAccessor, fullPath), + FetchMode::Copy, name); + + allowPath(storePath); + return storePath; +} +``` + +### Phase 2: Updated Builtins (`src/libexpr/primops/tectonix.cc`) + +#### 2.1 Simplify `prim_unsafeTectonixInternalZoneSrc` + +```cpp +static void prim_unsafeTectonixInternalZoneSrc(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto zonePath = state.forceStringNoCtx(*args[0], pos, + "while evaluating the 'zonePath' argument to builtins.unsafeTectonixInternalZoneSrc"); + + auto storePath = state.getZoneStorePath(zonePath); + state.allowAndSetStorePathString(storePath, v); +} +``` + +#### 2.2 New `prim_worldZone` (flake-like interface) + +```cpp +static void prim_worldZone(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto zonePath = state.forceStringNoCtx(*args[0], pos, + "while evaluating the 'zonePath' argument to builtins.worldZone"); + + // Get tree SHA before we potentially fetch + auto treeSha = state.getWorldTreeSha(zonePath); + + // Check dirty status + bool isDirty = false; + if (state.isTectonixSourceAvailable()) { + auto & dirtyZones = state.getTectonixDirtyZones(); + auto it = dirtyZones.find(std::string(zonePath)); + isDirty = it != dirtyZones.end() && it->second; + } + + auto storePath = state.getZoneStorePath(zonePath); + auto storePathStr = state.store->printStorePath(storePath); + + // Build result attrset (like fetchTree) + auto attrs = state.buildBindings(4); + + attrs.alloc("outPath").mkString(storePathStr, { + NixStringContextElem::Opaque{storePath} + }); + attrs.alloc("treeSha").mkString(treeSha.gitRev(), state.mem); + attrs.alloc("zonePath").mkString(zonePath, state.mem); + attrs.alloc("dirty").mkBool(isDirty); + + v.mkAttrs(attrs); +} + +static RegisterPrimOp primop_worldZone({ + .name = "worldZone", + .args = {"zonePath"}, + .doc = R"( + Get a zone from the world repository. + + Returns an attrset with: + - outPath: Store path containing zone source (lazy with lazy-trees) + - treeSha: Git tree SHA for this zone + - zonePath: The zone path argument + - dirty: Whether the zone has uncommitted changes + + Example: `builtins.worldZone "//areas/tools/tec"` + + Requires `--tectonix-git-dir` and `--tectonix-sha` to be set. + )", + .fun = prim_worldZone, +}); +``` + +#### 2.3 New `prim_worldRoot` (read-only world access) + +```cpp +static void prim_worldRoot(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + // Lazily mount the whole world accessor once per evaluation + auto storePath = state.getOrMountWorldRoot(); + + v.mkPath(state.rootPath( + CanonPath(state.store->printStorePath(storePath)))); +} + +static RegisterPrimOp primop_worldRoot({ + .name = "worldRoot", + .args = {}, + .doc = R"( + Get a path to the world repository root. + + This path can be used for reading files during evaluation: + + let world = builtins.worldRoot; + in import (world + "/areas/tools/tec/zone.nix") + + WARNING: Do not use this path directly as a derivation src! + That would copy the entire world to the store. Use + builtins.worldZone for derivation sources. + + Requires `--tectonix-git-dir` and `--tectonix-sha` to be set. + )", + .fun = prim_worldRoot, +}); +``` + +With supporting method in `EvalState`: + +```cpp +StorePath EvalState::getOrMountWorldRoot() +{ + // Thread-safe lazy initialization + static std::once_flag mounted; + static StorePath worldStorePath; + + std::call_once(mounted, [this]() { + auto accessor = getWorldGitAccessor(); + worldStorePath = StorePath::random("world"); + allowPath(worldStorePath); + storeFS->mount( + CanonPath(store->printStorePath(worldStorePath)), + accessor); + }); + + return worldStorePath; +} +``` + +--- + +## Usage Examples + +### Before (eager) + +```nix +let + zoneSrc = builtins.unsafeTectonixInternalZoneSrc "//areas/tools/tec"; + # ^ Entire zone copied to store immediately +in +mkDerivation { + src = zoneSrc; + ... +} +``` + +### After (lazy) + +```nix +let + world = builtins.worldRoot; + + # Read-only access (no store copy during evaluation) + zoneNix = import (world + "/areas/tools/tec/zone.nix"); + manifest = builtins.fromJSON (builtins.readFile (world + "/.meta/manifest.json")); + + # For derivation src, use worldZone (zone-granular lazy copy) + tecZone = builtins.worldZone "//areas/tools/tec"; +in +mkDerivation { + src = tecZone.outPath; # Only copied when derivation is built + ... +} +``` + +--- + +## Builtin Migration Guide + +| Old Pattern | New Pattern | +|-------------|-------------| +| `__unsafeTectonixInternalZoneSrc path` | `(worldZone path).outPath` | +| `__unsafeTectonixInternalTreeSha path` then `__unsafeTectonixInternalTree sha` | `(worldZone path).outPath` | +| `__unsafeTectonixInternalFile path` | `builtins.readFile (worldRoot + path)` | +| `__unsafeTectonixInternalDir zone subpath` | `builtins.readDir (worldRoot + zone + "/" + subpath)` | + +The `__unsafeTectonixInternalTree` builtin can be retained for edge cases (fetching arbitrary tree SHAs not corresponding to zones), but becomes less central. + +--- + +## Extension Point: Lazy Dirty Zones + +The `getZoneFromCheckout()` function is the clear extension point for future optimization. + +### Current Behavior + +Dirty zones are always eagerly copied from checkout: + +```cpp +StorePath EvalState::getZoneFromCheckout(std::string_view zonePath) +{ + // Always eager for now + return fetchToStore(...); +} +``` + +### Future Options + +1. **Content-hash dirty files** + - Walk checkout, hash modified files + - Use combined hash as cache key + - Complex but accurate + +2. **Overlay accessor** + - Base: git ODB accessor for zone + - Overlay: checkout accessor filtered to dirty files + - Mount the composite accessor + - Cache key: `(treeSha, set of dirty file paths)` + +3. **Mtime-based caching** + - Use checkout accessor with mtime as cache key + - Simpler but may re-copy on unrelated file touches + +The interface is clean: `getZoneStorePath()` decides dirty vs clean and delegates appropriately. The dirty path can be made lazy without changing callers. + +--- + +## Testing Plan + +1. **Lazy-trees enabled, clean zone** + - Verify virtual store path is created + - Verify no immediate copy to store + - Verify devirtualization on derivation build + +2. **Lazy-trees disabled** + - Verify immediate copy (current behavior preserved) + +3. **Dirty zones** + - Verify fallback to checkout + - Verify eager copy (for now) + +4. **Cache behavior** + - Same tree SHA returns same virtual path + - Different tree SHA returns different path + - Thread-safe with `eval-cores > 1` + +5. **Cross-world-revision deduplication** + - Zone unchanged between commits → same devirtualized store path + +--- + +## Summary + +| Component | Purpose | +|-----------|---------| +| `tectonixZoneCache_` | Tree SHA → virtual store path mapping | +| `getZoneStorePath()` | Orchestrator: dirty detection → dispatch | +| `mountZoneByTreeSha()` | Lazy mount for clean zones | +| `getZoneFromCheckout()` | Eager (for now) for dirty zones - **extension point** | +| `worldZone` | High-level builtin returning attrset | +| `worldRoot` | Read-only world access path | + +This design: +- Integrates cleanly with existing lazy-trees infrastructure +- Uses tree SHA for natural content-addressed caching +- Leaves clear extension point for dirty zone optimization +- Provides flake-like API consistency +- Enables `worldRoot` for ergonomic read-only access diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 218ff0db549..2b67a67c859 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -26,6 +26,7 @@ #include "nix/fetchers/input-cache.hh" #include "nix/fetchers/git-utils.hh" #include "nix/util/current-process.hh" +#include "nix/util/processes.hh" #include "nix/store/async-path-writer.hh" #include "nix/expr/parallel-eval.hh" @@ -435,7 +436,7 @@ ref EvalState::getWorldRepo() const if (hasPrefix(gitDir, "~/")) gitDir = getHome() + gitDir.substr(1); - worldRepo = GitRepo::openRepo(std::filesystem::path(gitDir), {.bare = true}); + worldRepo = GitRepo::openRepo(std::filesystem::path(gitDir), {.bare = true, .odbOnly = true}); } return *worldRepo; } @@ -625,12 +626,14 @@ const std::map & EvalState::getTectonixDirtyZones() const dirtyZones[zonePath] = false; } - // Get dirty files by diffing tree against workdir (works with ODB-only repo) + // Get dirty files via git status (avoids libgit2 reftables issue) auto checkoutPath = settings.tectonixCheckoutPath.get(); - auto dirtyFiles = repo->getDirtyFilesAgainstTree(hash, checkoutPath); + auto gitStatusOutput = runProgram("git", true, {"-C", checkoutPath, "status", "--porcelain"}); - for (const auto & dirtyFile : dirtyFiles) { - auto filePath = dirtyFile.abs(); + for (auto & line : tokenizeString>(gitStatusOutput, "\n")) { + if (line.size() < 4) continue; // Skip empty/malformed lines + // Format: "XY filename" where XY is 2-char status + auto filePath = "/" + std::string(line.substr(3)); // Find which zone this file belongs to for (auto & [zonePath, dirty] : dirtyZones) { @@ -655,6 +658,159 @@ const std::map & EvalState::getTectonixDirtyZones() const return *tectonixDirtyZones; } +StorePath EvalState::getZoneStorePath(std::string_view zonePath) +{ + // Normalize path + std::string zone(zonePath); + if (hasPrefix(zone, "//")) + zone = zone.substr(2); + + // Check dirty status + bool isDirty = false; + if (isTectonixSourceAvailable()) { + auto & dirtyZones = getTectonixDirtyZones(); + auto it = dirtyZones.find(std::string(zonePath)); + isDirty = it != dirtyZones.end() && it->second; + } + + if (isDirty) { + // EXTENSION POINT: For now, always eager from checkout + return getZoneFromCheckout(zonePath); + } + + // Clean zone: get tree SHA + auto treeSha = getWorldTreeSha(zonePath); + + if (!settings.lazyTrees) { + // Eager mode: immediate copy from git ODB + auto repo = getWorldRepo(); + GitAccessorOptions opts{.exportIgnore = true, .smudgeLfs = false}; + auto accessor = repo->getAccessor(treeSha, opts, "zone"); + + std::string name = "zone-" + replaceStrings(zone, "/", "-"); + auto storePath = fetchToStore( + fetchSettings, *store, + SourcePath(accessor, CanonPath::root), + FetchMode::Copy, name); + + allowPath(storePath); + return storePath; + } + + // Lazy mode: mount by tree SHA + return mountZoneByTreeSha(treeSha, zonePath); +} + +StorePath EvalState::mountZoneByTreeSha(const Hash & treeSha, std::string_view zonePath) +{ + // Check cache first (thread-safe) + { + auto cache = tectonixZoneCache_.readLock(); + auto it = cache->find(treeSha); + if (it != cache->end()) { + debug("zone cache hit for tree %s", treeSha.gitRev()); + return it->second; + } + } + + // Not cached: create accessor and mount + auto repo = getWorldRepo(); + GitAccessorOptions opts{.exportIgnore = true, .smudgeLfs = false}; + auto accessor = repo->getAccessor(treeSha, opts, "zone"); + + // Generate name from zone path + std::string zone(zonePath); + if (hasPrefix(zone, "//")) + zone = zone.substr(2); + std::string name = "zone-" + replaceStrings(zone, "/", "-"); + + // Create virtual store path + auto storePath = StorePath::random(name); + allowPath(storePath); + + // Mount accessor at this path + storeFS->mount(CanonPath(store->printStorePath(storePath)), accessor); + + // Cache it (thread-safe) + { + auto cache = tectonixZoneCache_.lock(); + auto [it, inserted] = cache->try_emplace(treeSha, storePath); + if (!inserted) { + // Another thread beat us, use their path + return it->second; + } + } + + debug("mounted zone %s (tree %s) at %s", + zonePath, treeSha.gitRev(), store->printStorePath(storePath)); + + return storePath; +} + +StorePath EvalState::getZoneFromCheckout(std::string_view zonePath) +{ + // EXTENSION POINT: Currently always eager. + // + // To make this lazy later, we'd need to: + // 1. Create a filtered accessor over the checkout path + // 2. Compute a content key (hash of modified files? mtime-based?) + // 3. Cache and mount like mountZoneByTreeSha + // + // For now: just copy from checkout. + + std::string zone(zonePath); + if (hasPrefix(zone, "//")) + zone = zone.substr(2); + + auto checkoutAccessor = getWorldCheckoutAccessor(); + if (!checkoutAccessor) + throw Error("checkout accessor not available for dirty zone '%s'", zonePath); + + auto checkoutPath = settings.tectonixCheckoutPath.get(); + auto fullPath = CanonPath(checkoutPath + "/" + zone); + + std::string name = "zone-" + replaceStrings(zone, "/", "-"); + + auto storePath = fetchToStore( + fetchSettings, *store, + SourcePath(*checkoutAccessor, fullPath), + FetchMode::Copy, name); + + allowPath(storePath); + return storePath; +} + +StorePath EvalState::getOrMountWorldRoot() +{ + // Check if already mounted (thread-safe) + { + auto cached = worldRootStorePath_.readLock(); + if (*cached) + return **cached; + } + + // Not mounted: create accessor and mount + auto accessor = getWorldGitAccessor(); + auto storePath = StorePath::random("world"); + allowPath(storePath); + + storeFS->mount(CanonPath(store->printStorePath(storePath)), accessor); + + // Cache it (thread-safe) + { + auto cache = worldRootStorePath_.lock(); + if (!*cache) { + *cache = storePath; + } else { + // Another thread beat us, use their path + return **cache; + } + } + + debug("mounted world root at %s", store->printStorePath(storePath)); + return storePath; +} + inline static bool isJustSchemePrefix(std::string_view prefix) { return !prefix.empty() && prefix[prefix.size() - 1] == ':' diff --git a/src/libexpr/include/nix/expr/eval.hh b/src/libexpr/include/nix/expr/eval.hh index 8880499a03b..16c4b5e480a 100644 --- a/src/libexpr/include/nix/expr/eval.hh +++ b/src/libexpr/include/nix/expr/eval.hh @@ -533,6 +533,29 @@ private: /** Lazy-initialized map of zone path → dirty status (only for sparse-checked-out zones) */ mutable std::optional> tectonixDirtyZones; + /** + * Cache tree SHA → virtual store path for lazy zone mounts. + * Thread-safe for eval-cores > 1. + */ + mutable SharedSync> tectonixZoneCache_; + + /** + * Lazily-mounted world root store path (for worldRoot builtin). + */ + mutable SharedSync> worldRootStorePath_; + + /** + * Mount a zone by tree SHA, returning a (potentially virtual) store path. + * Caches by tree SHA for deduplication across world revisions. + */ + StorePath mountZoneByTreeSha(const Hash & treeSha, std::string_view zonePath); + + /** + * Get zone store path from checkout (for dirty zones). + * EXTENSION POINT: Currently always eager. Could be made lazy later. + */ + StorePath getZoneFromCheckout(std::string_view zonePath); + public: /** @@ -585,6 +608,21 @@ public: /** Get map of zone path → dirty status (only for sparse-checked-out zones) */ const std::map & getTectonixDirtyZones() const; + /** + * Get a zone's store path, handling dirty detection and lazy mounting. + * + * For clean zones with lazy-trees enabled: mounts accessor lazily + * For dirty zones: currently eager-copies from checkout (extension point) + * For lazy-trees disabled: eager-copies from git + */ + StorePath getZoneStorePath(std::string_view zonePath); + + /** + * Get or mount the world root for read-only access. + * Used by the worldRoot builtin. + */ + StorePath getOrMountWorldRoot(); + /** * Return a `SourcePath` that refers to `path` in the root * filesystem. diff --git a/src/libexpr/primops/tectonix.cc b/src/libexpr/primops/tectonix.cc index 29025ad0be3..db7c7bec61d 100644 --- a/src/libexpr/primops/tectonix.cc +++ b/src/libexpr/primops/tectonix.cc @@ -279,56 +279,16 @@ static RegisterPrimOp primop_worldZoneFile({ // ============================================================================ // builtins.unsafeTectonixInternalZoneSrc zonePath // Returns a store path containing the zone source +// With lazy-trees enabled, returns a virtual store path that is only +// materialized when used as a derivation input. // ============================================================================ static void prim_unsafeTectonixInternalZoneSrc(EvalState & state, const PosIdx pos, Value ** args, Value & v) { auto zonePath = state.forceStringNoCtx(*args[0], pos, "while evaluating the 'zonePath' argument to builtins.unsafeTectonixInternalZoneSrc"); - // Normalize zone path - std::string zone(zonePath); - if (hasPrefix(zone, "//")) - zone = zone.substr(2); - - auto fullPath = CanonPath("/" + zone); - std::string name = "zone-" + replaceStrings(zone, "/", "-"); - - // In source-available mode with dirty zone, use checkout - auto & dirtyZones = state.getTectonixDirtyZones(); - auto it = dirtyZones.find(std::string(zonePath)); - bool isDirty = it != dirtyZones.end() && it->second; - if (state.isTectonixSourceAvailable() && isDirty) { - auto checkoutAccessor = state.getWorldCheckoutAccessor(); - if (!checkoutAccessor) - state.error("checkout accessor not available").atPos(pos).debugThrow(); - - auto checkoutPath = state.settings.tectonixCheckoutPath.get(); - auto checkoutFullPath = CanonPath(checkoutPath + fullPath.abs()); - - auto storePath = fetchToStore( - state.fetchSettings, - *state.store, - SourcePath(*checkoutAccessor, checkoutFullPath), - FetchMode::Copy, - name); - - state.allowAndSetStorePathString(storePath, v); - } else { - // Use git content - auto treeSha = state.getWorldTreeSha(zonePath); - auto repo = state.getWorldRepo(); - GitAccessorOptions opts{.exportIgnore = true, .smudgeLfs = false}; - auto accessor = repo->getAccessor(treeSha, opts, "world-zone"); - - auto storePath = fetchToStore( - state.fetchSettings, - *state.store, - SourcePath(accessor, CanonPath::root), - FetchMode::Copy, - name); - - state.allowAndSetStorePathString(storePath, v); - } + auto storePath = state.getZoneStorePath(zonePath); + state.allowAndSetStorePathString(storePath, v); } static RegisterPrimOp primop_unsafeTectonixInternalZoneSrc({ @@ -337,8 +297,11 @@ static RegisterPrimOp primop_unsafeTectonixInternalZoneSrc({ .doc = R"( Get the source of a zone as a store path. - In source-available mode with uncommitted changes, uses checkout content. - Otherwise uses git content. + With `lazy-trees = true`, returns a virtual store path that is only + materialized when used as a derivation input (devirtualized). + + In source-available mode with uncommitted changes, uses checkout content + (always eager for dirty zones). Example: `builtins.unsafeTectonixInternalZoneSrc "//areas/tools/tec"` @@ -490,4 +453,106 @@ static RegisterPrimOp primop_unsafeTectonixInternalDirtyZones({ .fun = prim_unsafeTectonixInternalDirtyZones, }); +// ============================================================================ +// builtins.worldZone zonePath +// Returns an attrset with zone info (flake-like interface) +// ============================================================================ +static void prim_worldZone(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto zonePath = state.forceStringNoCtx(*args[0], pos, + "while evaluating the 'zonePath' argument to builtins.worldZone"); + + // Get tree SHA before we potentially fetch + auto treeSha = state.getWorldTreeSha(zonePath); + + // Check dirty status + bool isDirty = false; + if (state.isTectonixSourceAvailable()) { + auto & dirtyZones = state.getTectonixDirtyZones(); + auto it = dirtyZones.find(std::string(zonePath)); + isDirty = it != dirtyZones.end() && it->second; + } + + auto storePath = state.getZoneStorePath(zonePath); + auto storePathStr = state.store->printStorePath(storePath); + + // Build result attrset (like fetchTree) + auto attrs = state.buildBindings(5); + + // outPath: string with context (for use as derivation src) + attrs.alloc("outPath").mkString(storePathStr, { + NixStringContextElem::Opaque{storePath} + }, state.mem); + + // root: path value (for reading files without devirtualization) + attrs.alloc("root").mkPath( + state.rootPath(CanonPath(storePathStr)), state.mem); + + attrs.alloc("treeSha").mkString(treeSha.gitRev(), state.mem); + attrs.alloc("zonePath").mkString(zonePath, state.mem); + attrs.alloc("dirty").mkBool(isDirty); + + v.mkAttrs(attrs); +} + +static RegisterPrimOp primop_worldZone({ + .name = "worldZone", + .args = {"zonePath"}, + .doc = R"( + Get a zone from the world repository. + + Returns an attrset with: + - outPath: Store path string with context (for use as derivation src) + - root: Path value for reading files (no devirtualization) + - treeSha: Git tree SHA for this zone + - zonePath: The zone path argument + - dirty: Whether the zone has uncommitted changes + + With `lazy-trees = true`, the zone is mounted lazily. Use `root` to + read files without triggering a copy to the store: + + let zone = builtins.worldZone "//areas/tools/tec"; + in import (zone.root + "/zone.nix") + + Use `outPath` as derivation src (triggers copy at build time): + + mkDerivation { src = zone.outPath; } + + Requires `--tectonix-git-dir` and `--tectonix-git-sha` to be set. + )", + .fun = prim_worldZone, +}); + +// ============================================================================ +// builtins.worldRoot +// Returns a path to the world repository root for read-only access +// ============================================================================ +static void prim_worldRoot(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto storePath = state.getOrMountWorldRoot(); + + v.mkPath(state.rootPath( + CanonPath(state.store->printStorePath(storePath))), state.mem); +} + +static RegisterPrimOp primop_worldRoot({ + .name = "worldRoot", + .args = {}, + .doc = R"( + Get a path to the world repository root. + + This path can be used for reading files during evaluation: + + let world = builtins.worldRoot; + in import (world + "/areas/tools/tec/zone.nix") + + WARNING: Do not use this path directly as a derivation src! + That would copy the entire world to the store. Use + builtins.worldZone for derivation sources. + + Requires `--tectonix-git-dir` and `--tectonix-git-sha` to be set. + )", + .fun = prim_worldRoot, +}); + } // namespace nix From 35c1561b91da15bcab87ac082e17c730874ef14e Mon Sep 17 00:00:00 2001 From: Burke Libbey Date: Mon, 22 Dec 2025 15:36:03 -0500 Subject: [PATCH 3/6] Remove redundant tectonix builtins, rename to internal API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove builtins now superseded by lazy-trees zone access: - __unsafeTectonixInternalFile: use builtins.readFile with __unsafeTectonixInternalRoot - worldZoneFile: use builtins.readFile (zone.root + "/path") - __unsafeTectonixInternalDir: use builtins.readDir (zone.root + "/path") Rename to consistent internal naming: - worldZone → __unsafeTectonixInternalZone - worldRoot → __unsafeTectonixInternalRoot 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/libexpr/primops/tectonix.cc | 218 ++------------------------------ 1 file changed, 14 insertions(+), 204 deletions(-) diff --git a/src/libexpr/primops/tectonix.cc b/src/libexpr/primops/tectonix.cc index db7c7bec61d..fa58ecf8ba3 100644 --- a/src/libexpr/primops/tectonix.cc +++ b/src/libexpr/primops/tectonix.cc @@ -164,118 +164,6 @@ static RegisterPrimOp primop_unsafeTectonixInternalTree({ .fun = prim_unsafeTectonixInternalTree, }); -// ============================================================================ -// builtins.unsafeTectonixInternalFile path -// Returns file contents as a string -// ============================================================================ -static void prim_unsafeTectonixInternalFile(EvalState & state, const PosIdx pos, Value ** args, Value & v) -{ - auto worldPath = state.forceStringNoCtx(*args[0], pos, - "while evaluating the 'path' argument to builtins.unsafeTectonixInternalFile"); - - // Normalize path (remove leading //) - std::string path(worldPath); - if (hasPrefix(path, "//")) - path = path.substr(2); - - auto fullPath = CanonPath("/" + path); - - // In source-available mode, check checkout first - if (state.isTectonixSourceAvailable()) { - auto checkoutAccessor = state.getWorldCheckoutAccessor(); - if (checkoutAccessor) { - auto checkoutPath = state.settings.tectonixCheckoutPath.get(); - auto checkoutFullPath = CanonPath(checkoutPath + fullPath.abs()); - if ((*checkoutAccessor)->pathExists(checkoutFullPath)) { - auto content = (*checkoutAccessor)->readFile(checkoutFullPath); - v.mkString(content, state.mem); - return; - } - } - } - - // Fall back to git - auto accessor = state.getWorldGitAccessor(); - if (!accessor->pathExists(fullPath)) - state.error("path '%s' does not exist in world", fullPath) - .atPos(pos).debugThrow(); - - auto content = accessor->readFile(fullPath); - v.mkString(content, state.mem); -} - -static RegisterPrimOp primop_unsafeTectonixInternalFile({ - .name = "__unsafeTectonixInternalFile", - .args = {"path"}, - .doc = R"( - Read a file from the world repository. - - In source-available mode (--tectonix-checkout-path set), prefers checkout files. - - Example: `builtins.unsafeTectonixInternalFile "//areas/tools/tec/zone.nix"` - - Requires `--tectonix-git-dir` and `--tectonix-git-sha` to be set. - )", - .fun = prim_unsafeTectonixInternalFile, -}); - -// ============================================================================ -// builtins.worldZoneFile zonePath pathInZone -// Returns file contents as a string -// ============================================================================ -static void prim_worldZoneFile(EvalState & state, const PosIdx pos, Value ** args, Value & v) -{ - auto zonePath = state.forceStringNoCtx(*args[0], pos, - "while evaluating the 'zonePath' argument to builtins.worldZoneFile"); - auto pathInZone = state.forceStringNoCtx(*args[1], pos, - "while evaluating the 'pathInZone' argument to builtins.worldZoneFile"); - - // Normalize zone path (remove leading //) - std::string zone(zonePath); - if (hasPrefix(zone, "//")) - zone = zone.substr(2); - - auto fullPath = CanonPath("/" + zone + "/" + std::string(pathInZone)); - - // In source-available mode, check checkout first - if (state.isTectonixSourceAvailable()) { - auto checkoutAccessor = state.getWorldCheckoutAccessor(); - if (checkoutAccessor) { - auto checkoutPath = state.settings.tectonixCheckoutPath.get(); - auto checkoutFullPath = CanonPath(checkoutPath + fullPath.abs()); - if ((*checkoutAccessor)->pathExists(checkoutFullPath)) { - auto content = (*checkoutAccessor)->readFile(checkoutFullPath); - v.mkString(content, state.mem); - return; - } - } - } - - // Fall back to git - auto accessor = state.getWorldGitAccessor(); - if (!accessor->pathExists(fullPath)) - state.error("path '%s' does not exist in world", fullPath) - .atPos(pos).debugThrow(); - - auto content = accessor->readFile(fullPath); - v.mkString(content, state.mem); -} - -static RegisterPrimOp primop_worldZoneFile({ - .name = "worldZoneFile", - .args = {"zonePath", "pathInZone"}, - .doc = R"( - Read a file from a zone in the world repository. - - In source-available mode (--tectonix-checkout-path set), prefers checkout files. - - Example: `builtins.worldZoneFile "//areas/tools/tec" "zone.nix"` - - Requires `--tectonix-git-dir` and `--tectonix-git-sha` to be set. - )", - .fun = prim_worldZoneFile, -}); - // ============================================================================ // builtins.unsafeTectonixInternalZoneSrc zonePath // Returns a store path containing the zone source @@ -310,84 +198,6 @@ static RegisterPrimOp primop_unsafeTectonixInternalZoneSrc({ .fun = prim_unsafeTectonixInternalZoneSrc, }); -// ============================================================================ -// builtins.unsafeTectonixInternalDir zonePath pathInZone -// Returns directory listing as attrset -// ============================================================================ -static void prim_unsafeTectonixInternalDir(EvalState & state, const PosIdx pos, Value ** args, Value & v) -{ - auto zonePath = state.forceStringNoCtx(*args[0], pos, - "while evaluating the 'zonePath' argument to builtins.unsafeTectonixInternalDir"); - auto pathInZone = state.forceStringNoCtx(*args[1], pos, - "while evaluating the 'pathInZone' argument to builtins.unsafeTectonixInternalDir"); - - // Normalize path - std::string zone(zonePath); - if (hasPrefix(zone, "//")) - zone = zone.substr(2); - - auto fullPath = CanonPath("/" + zone + "/" + std::string(pathInZone)); - - // Determine which accessor to use - ref accessor = state.getWorldGitAccessor(); - CanonPath accessPath = fullPath; - - if (state.isTectonixSourceAvailable()) { - auto checkoutAccessor = state.getWorldCheckoutAccessor(); - if (checkoutAccessor) { - auto checkoutPath = state.settings.tectonixCheckoutPath.get(); - auto checkoutFullPath = CanonPath(checkoutPath + fullPath.abs()); - if ((*checkoutAccessor)->pathExists(checkoutFullPath)) { - accessor = *checkoutAccessor; - accessPath = checkoutFullPath; - } - } - } - - if (!accessor->pathExists(accessPath)) - state.error("path '%s' does not exist in world", fullPath) - .atPos(pos).debugThrow(); - - auto entries = accessor->readDirectory(accessPath); - - auto attrs = state.buildBindings(entries.size()); - for (auto & [name, typeOpt] : entries) { - const char * typeStr; - if (!typeOpt) { - typeStr = "unknown"; - } else { - switch (*typeOpt) { - case SourceAccessor::Type::tRegular: typeStr = "regular"; break; - case SourceAccessor::Type::tDirectory: typeStr = "directory"; break; - case SourceAccessor::Type::tSymlink: typeStr = "symlink"; break; - case SourceAccessor::Type::tChar: - case SourceAccessor::Type::tBlock: - case SourceAccessor::Type::tSocket: - case SourceAccessor::Type::tFifo: - case SourceAccessor::Type::tUnknown: - typeStr = "unknown"; break; - } - } - attrs.alloc(state.symbols.create(name)).mkString(typeStr, state.mem); - } - v.mkAttrs(attrs); -} - -static RegisterPrimOp primop_unsafeTectonixInternalDir({ - .name = "__unsafeTectonixInternalDir", - .args = {"zonePath", "pathInZone"}, - .doc = R"( - List directory contents from the world repository. - - Returns an attrset mapping names to types ("regular", "directory", "symlink"). - - Example: `builtins.unsafeTectonixInternalDir "//areas/tools/tec" "src"` - - Requires `--tectonix-git-dir` and `--tectonix-git-sha` to be set. - )", - .fun = prim_unsafeTectonixInternalDir, -}); - // ============================================================================ // builtins.unsafeTectonixInternalSparseCheckoutRoots // Returns list of zone IDs in sparse checkout @@ -454,13 +264,13 @@ static RegisterPrimOp primop_unsafeTectonixInternalDirtyZones({ }); // ============================================================================ -// builtins.worldZone zonePath +// builtins.__unsafeTectonixInternalZone zonePath // Returns an attrset with zone info (flake-like interface) // ============================================================================ -static void prim_worldZone(EvalState & state, const PosIdx pos, Value ** args, Value & v) +static void prim_unsafeTectonixInternalZone(EvalState & state, const PosIdx pos, Value ** args, Value & v) { auto zonePath = state.forceStringNoCtx(*args[0], pos, - "while evaluating the 'zonePath' argument to builtins.worldZone"); + "while evaluating the 'zonePath' argument to builtins.__unsafeTectonixInternalZone"); // Get tree SHA before we potentially fetch auto treeSha = state.getWorldTreeSha(zonePath); @@ -495,8 +305,8 @@ static void prim_worldZone(EvalState & state, const PosIdx pos, Value ** args, V v.mkAttrs(attrs); } -static RegisterPrimOp primop_worldZone({ - .name = "worldZone", +static RegisterPrimOp primop_unsafeTectonixInternalZone({ + .name = "__unsafeTectonixInternalZone", .args = {"zonePath"}, .doc = R"( Get a zone from the world repository. @@ -511,7 +321,7 @@ static RegisterPrimOp primop_worldZone({ With `lazy-trees = true`, the zone is mounted lazily. Use `root` to read files without triggering a copy to the store: - let zone = builtins.worldZone "//areas/tools/tec"; + let zone = builtins.__unsafeTectonixInternalZone "//areas/tools/tec"; in import (zone.root + "/zone.nix") Use `outPath` as derivation src (triggers copy at build time): @@ -520,14 +330,14 @@ static RegisterPrimOp primop_worldZone({ Requires `--tectonix-git-dir` and `--tectonix-git-sha` to be set. )", - .fun = prim_worldZone, + .fun = prim_unsafeTectonixInternalZone, }); // ============================================================================ -// builtins.worldRoot +// builtins.__unsafeTectonixInternalRoot // Returns a path to the world repository root for read-only access // ============================================================================ -static void prim_worldRoot(EvalState & state, const PosIdx pos, Value ** args, Value & v) +static void prim_unsafeTectonixInternalRoot(EvalState & state, const PosIdx pos, Value ** args, Value & v) { auto storePath = state.getOrMountWorldRoot(); @@ -535,24 +345,24 @@ static void prim_worldRoot(EvalState & state, const PosIdx pos, Value ** args, V CanonPath(state.store->printStorePath(storePath))), state.mem); } -static RegisterPrimOp primop_worldRoot({ - .name = "worldRoot", +static RegisterPrimOp primop_unsafeTectonixInternalRoot({ + .name = "__unsafeTectonixInternalRoot", .args = {}, .doc = R"( Get a path to the world repository root. This path can be used for reading files during evaluation: - let world = builtins.worldRoot; + let world = builtins.__unsafeTectonixInternalRoot; in import (world + "/areas/tools/tec/zone.nix") WARNING: Do not use this path directly as a derivation src! That would copy the entire world to the store. Use - builtins.worldZone for derivation sources. + builtins.__unsafeTectonixInternalZone for derivation sources. Requires `--tectonix-git-dir` and `--tectonix-git-sha` to be set. )", - .fun = prim_worldRoot, + .fun = prim_unsafeTectonixInternalRoot, }); } // namespace nix From 0e70cf7e70c47add8c1dd5d0d37ec641dc63d442 Mon Sep 17 00:00:00 2001 From: Burke Libbey Date: Mon, 22 Dec 2025 16:17:18 -0500 Subject: [PATCH 4/6] Remove worldRoot builtin, validate zone paths are exact roots MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove __unsafeTectonixInternalRoot (and getOrMountWorldRoot, worldRootStorePath_) - Add validation to __unsafeTectonixInternalZone to ensure the path exists in the manifest (is an exact zone root, not a prefix or subdir) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/libexpr/eval.cc | 31 ---------------------- src/libexpr/include/nix/expr/eval.hh | 11 -------- src/libexpr/primops/tectonix.cc | 39 +++++----------------------- 3 files changed, 7 insertions(+), 74 deletions(-) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 2b67a67c859..50c02128652 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -780,37 +780,6 @@ StorePath EvalState::getZoneFromCheckout(std::string_view zonePath) return storePath; } -StorePath EvalState::getOrMountWorldRoot() -{ - // Check if already mounted (thread-safe) - { - auto cached = worldRootStorePath_.readLock(); - if (*cached) - return **cached; - } - - // Not mounted: create accessor and mount - auto accessor = getWorldGitAccessor(); - auto storePath = StorePath::random("world"); - allowPath(storePath); - - storeFS->mount(CanonPath(store->printStorePath(storePath)), accessor); - - // Cache it (thread-safe) - { - auto cache = worldRootStorePath_.lock(); - if (!*cache) { - *cache = storePath; - } else { - // Another thread beat us, use their path - return **cache; - } - } - - debug("mounted world root at %s", store->printStorePath(storePath)); - return storePath; -} - inline static bool isJustSchemePrefix(std::string_view prefix) { return !prefix.empty() && prefix[prefix.size() - 1] == ':' diff --git a/src/libexpr/include/nix/expr/eval.hh b/src/libexpr/include/nix/expr/eval.hh index 16c4b5e480a..db77dd8fa3a 100644 --- a/src/libexpr/include/nix/expr/eval.hh +++ b/src/libexpr/include/nix/expr/eval.hh @@ -539,11 +539,6 @@ private: */ mutable SharedSync> tectonixZoneCache_; - /** - * Lazily-mounted world root store path (for worldRoot builtin). - */ - mutable SharedSync> worldRootStorePath_; - /** * Mount a zone by tree SHA, returning a (potentially virtual) store path. * Caches by tree SHA for deduplication across world revisions. @@ -617,12 +612,6 @@ public: */ StorePath getZoneStorePath(std::string_view zonePath); - /** - * Get or mount the world root for read-only access. - * Used by the worldRoot builtin. - */ - StorePath getOrMountWorldRoot(); - /** * Return a `SourcePath` that refers to `path` in the root * filesystem. diff --git a/src/libexpr/primops/tectonix.cc b/src/libexpr/primops/tectonix.cc index fa58ecf8ba3..6f684b123b6 100644 --- a/src/libexpr/primops/tectonix.cc +++ b/src/libexpr/primops/tectonix.cc @@ -272,6 +272,13 @@ static void prim_unsafeTectonixInternalZone(EvalState & state, const PosIdx pos, auto zonePath = state.forceStringNoCtx(*args[0], pos, "while evaluating the 'zonePath' argument to builtins.__unsafeTectonixInternalZone"); + // Validate that zonePath is exactly a zone root (exists in manifest) + auto content = readManifestContent(state, pos); + auto manifest = nlohmann::json::parse(content); + if (!manifest.contains(std::string(zonePath))) + state.error("'%s' is not a zone root (must be an exact path from the manifest)", zonePath) + .atPos(pos).debugThrow(); + // Get tree SHA before we potentially fetch auto treeSha = state.getWorldTreeSha(zonePath); @@ -333,36 +340,4 @@ static RegisterPrimOp primop_unsafeTectonixInternalZone({ .fun = prim_unsafeTectonixInternalZone, }); -// ============================================================================ -// builtins.__unsafeTectonixInternalRoot -// Returns a path to the world repository root for read-only access -// ============================================================================ -static void prim_unsafeTectonixInternalRoot(EvalState & state, const PosIdx pos, Value ** args, Value & v) -{ - auto storePath = state.getOrMountWorldRoot(); - - v.mkPath(state.rootPath( - CanonPath(state.store->printStorePath(storePath))), state.mem); -} - -static RegisterPrimOp primop_unsafeTectonixInternalRoot({ - .name = "__unsafeTectonixInternalRoot", - .args = {}, - .doc = R"( - Get a path to the world repository root. - - This path can be used for reading files during evaluation: - - let world = builtins.__unsafeTectonixInternalRoot; - in import (world + "/areas/tools/tec/zone.nix") - - WARNING: Do not use this path directly as a derivation src! - That would copy the entire world to the store. Use - builtins.__unsafeTectonixInternalZone for derivation sources. - - Requires `--tectonix-git-dir` and `--tectonix-git-sha` to be set. - )", - .fun = prim_unsafeTectonixInternalRoot, -}); - } // namespace nix From 852f6369886a1fe1505638891faa00394802b138 Mon Sep 17 00:00:00 2001 From: Burke Libbey Date: Mon, 22 Dec 2025 16:22:01 -0500 Subject: [PATCH 5/6] Implement lazy mounting for checkout (dirty) zones MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With lazy-trees enabled, dirty zones are now mounted lazily using makeFSSourceAccessor rooted at the zone directory in the checkout. This avoids copying the entire zone to the store until it's actually used as a derivation input. - Add tectonixCheckoutZoneCache_ for caching mounted checkout zones - Update getZoneFromCheckout to mount lazily when lazy-trees enabled - Cache by zone path for the duration of evaluation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/libexpr/eval.cc | 64 +++++++++++++++++++--------- src/libexpr/include/nix/expr/eval.hh | 8 +++- 2 files changed, 52 insertions(+), 20 deletions(-) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 50c02128652..7180825e5b8 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -749,34 +749,60 @@ StorePath EvalState::mountZoneByTreeSha(const Hash & treeSha, std::string_view z StorePath EvalState::getZoneFromCheckout(std::string_view zonePath) { - // EXTENSION POINT: Currently always eager. - // - // To make this lazy later, we'd need to: - // 1. Create a filtered accessor over the checkout path - // 2. Compute a content key (hash of modified files? mtime-based?) - // 3. Cache and mount like mountZoneByTreeSha - // - // For now: just copy from checkout. - std::string zone(zonePath); if (hasPrefix(zone, "//")) zone = zone.substr(2); - auto checkoutAccessor = getWorldCheckoutAccessor(); - if (!checkoutAccessor) - throw Error("checkout accessor not available for dirty zone '%s'", zonePath); - + std::string name = "zone-" + replaceStrings(zone, "/", "-"); auto checkoutPath = settings.tectonixCheckoutPath.get(); - auto fullPath = CanonPath(checkoutPath + "/" + zone); + auto fullPath = std::filesystem::path(checkoutPath) / zone; - std::string name = "zone-" + replaceStrings(zone, "/", "-"); + if (!settings.lazyTrees) { + // Eager mode: immediate copy from checkout + auto checkoutAccessor = getWorldCheckoutAccessor(); + if (!checkoutAccessor) + throw Error("checkout accessor not available for dirty zone '%s'", zonePath); - auto storePath = fetchToStore( - fetchSettings, *store, - SourcePath(*checkoutAccessor, fullPath), - FetchMode::Copy, name); + auto storePath = fetchToStore( + fetchSettings, *store, + SourcePath(*checkoutAccessor, CanonPath(checkoutPath + "/" + zone)), + FetchMode::Copy, name); + allowPath(storePath); + return storePath; + } + + // Lazy mode: check cache first (thread-safe) + { + auto cache = tectonixCheckoutZoneCache_.readLock(); + auto it = cache->find(std::string(zonePath)); + if (it != cache->end()) { + debug("checkout zone cache hit for %s", zonePath); + return it->second; + } + } + + // Not cached: create accessor rooted at zone directory and mount + auto accessor = makeFSSourceAccessor(fullPath); + + // Create virtual store path + auto storePath = StorePath::random(name); allowPath(storePath); + + // Mount accessor at this path + storeFS->mount(CanonPath(store->printStorePath(storePath)), accessor); + + // Cache it (thread-safe) + { + auto cache = tectonixCheckoutZoneCache_.lock(); + auto [it, inserted] = cache->try_emplace(std::string(zonePath), storePath); + if (!inserted) { + // Another thread beat us, use their path + return it->second; + } + } + + debug("mounted checkout zone %s at %s", zonePath, store->printStorePath(storePath)); return storePath; } diff --git a/src/libexpr/include/nix/expr/eval.hh b/src/libexpr/include/nix/expr/eval.hh index db77dd8fa3a..93875abda2a 100644 --- a/src/libexpr/include/nix/expr/eval.hh +++ b/src/libexpr/include/nix/expr/eval.hh @@ -539,6 +539,12 @@ private: */ mutable SharedSync> tectonixZoneCache_; + /** + * Cache zone path → virtual store path for lazy checkout zone mounts. + * Thread-safe for eval-cores > 1. + */ + mutable SharedSync> tectonixCheckoutZoneCache_; + /** * Mount a zone by tree SHA, returning a (potentially virtual) store path. * Caches by tree SHA for deduplication across world revisions. @@ -547,7 +553,7 @@ private: /** * Get zone store path from checkout (for dirty zones). - * EXTENSION POINT: Currently always eager. Could be made lazy later. + * With lazy-trees enabled, mounts lazily and caches by zone path. */ StorePath getZoneFromCheckout(std::string_view zonePath); From 7e44b1f6995bc4a51ad39727e6b47e4e32e4c225 Mon Sep 17 00:00:00 2001 From: Burke Libbey Date: Fri, 9 Jan 2026 00:28:00 -0600 Subject: [PATCH 6/6] Implement nested (internal) zones for tectonix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit implements support for internal zones - zones that exist within other zones, providing encapsulation and modularity. Internal zones are hidden from their host zone's lazy-tree source (the `_internal` directory is filtered out), addressable as first-class zones via extended paths like `//a/b/c/_internal/d/e`, and recursively nestable (internal zones can have their own `_internal` with more zones). The implementation uses a "peel" operation to parse zone paths, recursive resolution for tree SHA computation, a `ZoneFilteringAccessor` that hides `_internal` directories at every level, and updated dirty zone detection that maps dirty files to the most specific zone. All accessor creation paths (lazy mount, eager copy, checkout mount) now wrap accessors with the zone filter to ensure hermetic isolation. # Nested Zones Design and Implementation Plan This document describes the design for **nested (internal) zones** - zones that exist within other zones, providing encapsulation and modularity. ## Overview Internal zones are: - **Hidden** from their host zone's lazy-tree source (as if `_internal` doesn't exist) - **Addressable** as first-class zones via extended paths like `//a/b/c/_internal/d/e` - **Recursively nestable** - internal zones can have their own `_internal` with more zones ### Constraints The `_internal` directory must contain precisely: 1. A `manifest.json` 2. Zone directories 3. No other files Internal zones are only readable from: - The enclosing zone - Co-internal cousins within that enclosing zone --- ## Zone Path Algebra ### Grammar ``` zone_path ::= top_level | internal top_level ::= "//" segments internal ::= zone_path "/_internal/" segments segments ::= name ("/" name)* ``` This grammar reveals the key insight: **an internal zone path is recursive** - the host of an internal zone can itself be an internal zone. ### Examples | Path | Host | Internal Path | |------|------|---------------| | `//areas/tools/tec` | (root manifest) | — | | `//areas/tools/tec/_internal/helpers` | `//areas/tools/tec` | `helpers` | | `//areas/tools/tec/_internal/a/b/_internal/c` | `//areas/tools/tec/_internal/a/b` | `c` | ### The "Peel" Operation Every zone path can be **peeled** into at most one layer: ```cpp struct PeeledZonePath { std::optional hostPath; // nullopt for top-level std::string localPath; // The path to look up in manifest }; PeeledZonePath peel(std::string_view path) { auto pos = path.rfind("/_internal/"); if (pos == std::string_view::npos) { return {.hostPath = std::nullopt, .localPath = std::string(path)}; } return { .hostPath = std::string(path.substr(0, pos)), .localPath = std::string(path.substr(pos + 11)) // skip "/_internal/" }; } ``` This is elegant because: - `peel("//a/b/c")` → `{nullopt, "//a/b/c"}` — top-level - `peel("//a/b/_internal/c")` → `{"//a/b", "c"}` — one level of nesting - `peel("//a/_internal/b/_internal/c")` → `{"//a/_internal/b", "c"}` — recursive host --- ## Resolution Algorithm ``` resolveZone(path): peeled = peel(path) if peeled.hostPath is null: # Top-level zone: use root manifest manifest = readRootManifest() assert peeled.localPath in manifest treeSha = computeTreeShaFromWorldRoot(peeled.localPath) return Zone(path, treeSha, manifest[peeled.localPath].id) # Internal zone: resolve host first (recursive!) hostZone = resolveZone(peeled.hostPath) # Read host's internal manifest internalManifest = readFile(hostZone.tree, "_internal/manifest.json") assert peeled.localPath in internalManifest # Compute tree SHA relative to host treeSha = getSubtreeSha(hostZone.treeSha, "_internal/" + peeled.localPath) return Zone(path, treeSha, internalManifest[peeled.localPath].id) ``` The beauty: **one algorithm handles arbitrary nesting depth** through recursion. --- ## Source Filtering: The Disappearing `_internal` Every zone's source accessor must filter out `_internal` directories **at every level**: ```cpp class ZoneFilteringAccessor : public FilteringSourceAccessor { bool isAllowed(const CanonPath & path) override { // Check each path component for (auto it = path.begin(); it != path.end(); ++it) { if (*it == "_internal") return false; } return true; } }; ``` This means: - `//a/b/c` sees everything EXCEPT any `_internal` subdirectories - `//a/b/c/_internal/d` sees everything EXCEPT any `_internal` subdirectories within it - Each zone is hermetically sealed from its internal zones --- ## Manifest Structure **Root manifest** (`//.meta/manifest.json`): ```json { "//areas/tools/tec": {"id": "W-123456"}, "//areas/platform/core": {"id": "W-789abc"} } ``` **Internal manifest** (`//areas/tools/tec/_internal/manifest.json`): ```json { "helpers": {"id": "W-def000"}, "test-utils": {"id": "W-def001"}, "deeply/nested/thing": {"id": "W-def002"} } ``` Note: Internal manifest paths are **relative** (no `//` prefix). --- ## Implementation Plan ### Phase 1: Zone Path Parsing Infrastructure **File: `src/libexpr/primops/tectonix.cc`** ```cpp namespace { struct PeeledZonePath { std::optional hostPath; std::string localPath; bool isInternal() const { return hostPath.has_value(); } }; PeeledZonePath peelZonePath(std::string_view path) { auto pos = path.rfind("/_internal/"); if (pos == std::string_view::npos) { return {.hostPath = std::nullopt, .localPath = std::string(path)}; } return { .hostPath = std::string(path.substr(0, pos)), .localPath = std::string(path.substr(pos + 11)) }; } } // anonymous namespace ``` ### Phase 2: Internal Manifest Reading **Add to `src/libexpr/primops/tectonix.cc`:** ```cpp static std::optional readInternalManifest( EvalState & state, const Hash & hostTreeSha) { auto repo = state.getWorldRepo(); GitAccessorOptions opts{.exportIgnore = false, .smudgeLfs = false}; auto accessor = repo->getAccessor(hostTreeSha, opts, "host"); auto manifestPath = CanonPath("_internal/manifest.json"); if (!accessor->pathExists(manifestPath)) return std::nullopt; return nlohmann::json::parse(accessor->readFile(manifestPath)); } ``` ### Phase 3: Recursive Tree SHA Computation **Modify `EvalState::getWorldTreeSha` in `src/libexpr/eval.cc`:** ```cpp Hash EvalState::getWorldTreeSha(std::string_view zonePath) const { auto peeled = peelZonePath(zonePath); if (!peeled.isInternal()) { // Existing top-level logic (unchanged) return computeTreeShaFromWorldRoot(peeled.localPath); } // Internal zone: recursive resolution auto hostTreeSha = getWorldTreeSha(*peeled.hostPath); auto repo = getWorldRepo(); // Navigate: hostTree -> _internal -> localPath auto internalTreeSha = repo->getSubtreeSha(hostTreeSha, "_internal"); // Walk through localPath segments for (auto & segment : tokenizeString>(peeled.localPath, "/")) { internalTreeSha = repo->getSubtreeSha(internalTreeSha, segment); } return internalTreeSha; } ``` ### Phase 4: Zone Filtering Accessor **Add to `src/libfetchers/filtering-source-accessor.cc` or inline:** ```cpp class ZoneFilteringAccessor : public FilteringSourceAccessor { public: ZoneFilteringAccessor(ref next) : FilteringSourceAccessor(std::move(next), makeNotAllowedError) {} private: static MakeNotAllowedError makeNotAllowedError(const CanonPath & path) { return RestrictedPathError( fmt("'%s' is hidden (inside _internal)", path)); } bool isAllowed(const CanonPath & path) override { for (auto it = path.begin(); it != path.end(); ++it) { if (*it == "_internal") return false; } return true; } }; ``` ### Phase 5: Updated Zone Resolution **Modify `prim_unsafeTectonixInternalZone` in `src/libexpr/primops/tectonix.cc`:** ```cpp static void prim_unsafeTectonixInternalZone(EvalState & state, const PosIdx pos, Value ** args, Value & v) { auto zonePath = state.forceStringNoCtx(*args[0], pos, "..."); auto peeled = peelZonePath(zonePath); // Validate zone exists in appropriate manifest if (!peeled.isInternal()) { // Top-level: check root manifest (existing logic) auto manifest = readRootManifest(state, pos); if (!manifest.contains(std::string(zonePath))) state.error("'%s' is not a zone", zonePath).atPos(pos).debugThrow(); } else { // Internal: resolve host, check its internal manifest auto hostTreeSha = state.getWorldTreeSha(*peeled.hostPath); auto internalManifest = readInternalManifest(state, hostTreeSha); if (!internalManifest) state.error("zone '%s' has no internal manifest", *peeled.hostPath) .atPos(pos).debugThrow(); if (!internalManifest->contains(peeled.localPath)) state.error("'%s' is not an internal zone of '%s'", peeled.localPath, *peeled.hostPath).atPos(pos).debugThrow(); } // Get tree SHA (handles recursion internally) auto treeSha = state.getWorldTreeSha(zonePath); // ... rest of existing logic, but wrap accessor with ZoneFilteringAccessor } ``` ### Phase 6: Updated `mountZoneByTreeSha` **Modify in `src/libexpr/eval.cc`:** ```cpp StorePath EvalState::mountZoneByTreeSha(const Hash & treeSha, std::string_view zonePath) { // ... existing cache check ... auto repo = getWorldRepo(); GitAccessorOptions opts{.exportIgnore = true, .smudgeLfs = false}; auto rawAccessor = repo->getAccessor(treeSha, opts, "zone"); // NEW: Wrap with _internal filter auto accessor = make_ref(rawAccessor); // ... rest of existing logic ... } ``` ### Phase 7: Dirty Zone Detection for Internal Zones **Modify `getTectonixDirtyZones` in `src/libexpr/eval.cc`:** This is trickier because we need to: 1. Detect dirty files in the checkout 2. Map them to zones (including internal zones) 3. A file at `a/b/_internal/c/foo.nix` means zone `//a/b/_internal/c` is dirty ```cpp // When processing dirty files, check if path contains _internal // and attribute dirtiness to the correct internal zone for (auto & dirtyFile : dirtyFiles) { auto zonePath = findEnclosingZone(dirtyFile, allManifests); dirtyZones[zonePath] = true; } ``` --- ## Summary of Changes | Component | Change | |-----------|--------| | Zone path parsing | Add `peelZonePath()` function | | Tree SHA computation | Recursive resolution for internal zones | | Manifest lookup | Support internal manifests relative to host zones | | Source accessor | Filter `_internal` at all levels | | Zone validation | Check appropriate manifest (root vs internal) | | Dirty detection | Attribute dirty files to correct zone level | --- ## Design Elegance The elegance comes from: 1. **One grammar** for all zone paths 2. **One algorithm** (peel + recurse) for all resolution depths 3. **One filter** (`_internal` everywhere) for all source access 4. **Relative paths** in internal manifests (no duplication of host path) --- ## Edge Cases ### Zone path with consecutive `_internal` `//a/_internal/_internal/b` — This shouldn't happen by design (manifest would declare `_internal/b`, not `_internal`). Should error gracefully. ### Missing internal manifest Error clearly: "Zone X does not have an internal manifest" ### Zone references itself Not possible with the manifest structure. ### Circular internal zones Not possible — each `_internal` is strictly nested deeper. ### Dirty zone detection for internal zones Need to check if the internal zone's files are dirty. The host zone being dirty doesn't mean the internal zone is dirty. --- ## Future Considerations: Access Control The design mentions that internal zones are "only readable from the zone that encloses them or their co-internal cousins." This access control could be enforced at: 1. **Nix expression level** — The code that uses these builtins enforces who can call them 2. **Builtin level** — Add a "caller zone" context and validate access This is deferred to a future phase. Co-Authored-By: Claude Opus 4.5 --- NESTED-ZONES.md | 392 ++++++++++++++++++++++++++++++++ src/libexpr/eval.cc | 190 +++++++++++++++- src/libexpr/primops/tectonix.cc | 109 ++++++++- 3 files changed, 672 insertions(+), 19 deletions(-) create mode 100644 NESTED-ZONES.md diff --git a/NESTED-ZONES.md b/NESTED-ZONES.md new file mode 100644 index 00000000000..1ca398617cd --- /dev/null +++ b/NESTED-ZONES.md @@ -0,0 +1,392 @@ +# Nested Zones Design and Implementation Plan + +This document describes the design for **nested (internal) zones** - zones that exist within other zones, providing encapsulation and modularity. + +## Overview + +Internal zones are: +- **Hidden** from their host zone's lazy-tree source (as if `_internal` doesn't exist) +- **Addressable** as first-class zones via extended paths like `//a/b/c/_internal/d/e` +- **Recursively nestable** - internal zones can have their own `_internal` with more zones + +### Constraints + +The `_internal` directory must contain precisely: +1. A `manifest.json` +2. Zone directories +3. No other files + +Internal zones are only readable from: +- The enclosing zone +- Co-internal cousins within that enclosing zone + +--- + +## Zone Path Algebra + +### Grammar + +``` +zone_path ::= top_level | internal +top_level ::= "//" segments +internal ::= zone_path "/_internal/" segments +segments ::= name ("/" name)* +``` + +This grammar reveals the key insight: **an internal zone path is recursive** - the host of an internal zone can itself be an internal zone. + +### Examples + +| Path | Host | Internal Path | +|------|------|---------------| +| `//areas/tools/tec` | (root manifest) | — | +| `//areas/tools/tec/_internal/helpers` | `//areas/tools/tec` | `helpers` | +| `//areas/tools/tec/_internal/a/b/_internal/c` | `//areas/tools/tec/_internal/a/b` | `c` | + +### The "Peel" Operation + +Every zone path can be **peeled** into at most one layer: + +```cpp +struct PeeledZonePath { + std::optional hostPath; // nullopt for top-level + std::string localPath; // The path to look up in manifest +}; + +PeeledZonePath peel(std::string_view path) { + auto pos = path.rfind("/_internal/"); + if (pos == std::string_view::npos) { + return {.hostPath = std::nullopt, .localPath = std::string(path)}; + } + return { + .hostPath = std::string(path.substr(0, pos)), + .localPath = std::string(path.substr(pos + 11)) // skip "/_internal/" + }; +} +``` + +This is elegant because: +- `peel("//a/b/c")` → `{nullopt, "//a/b/c"}` — top-level +- `peel("//a/b/_internal/c")` → `{"//a/b", "c"}` — one level of nesting +- `peel("//a/_internal/b/_internal/c")` → `{"//a/_internal/b", "c"}` — recursive host + +--- + +## Resolution Algorithm + +``` +resolveZone(path): + peeled = peel(path) + + if peeled.hostPath is null: + # Top-level zone: use root manifest + manifest = readRootManifest() + assert peeled.localPath in manifest + treeSha = computeTreeShaFromWorldRoot(peeled.localPath) + return Zone(path, treeSha, manifest[peeled.localPath].id) + + # Internal zone: resolve host first (recursive!) + hostZone = resolveZone(peeled.hostPath) + + # Read host's internal manifest + internalManifest = readFile(hostZone.tree, "_internal/manifest.json") + assert peeled.localPath in internalManifest + + # Compute tree SHA relative to host + treeSha = getSubtreeSha(hostZone.treeSha, "_internal/" + peeled.localPath) + + return Zone(path, treeSha, internalManifest[peeled.localPath].id) +``` + +The beauty: **one algorithm handles arbitrary nesting depth** through recursion. + +--- + +## Source Filtering: The Disappearing `_internal` + +Every zone's source accessor must filter out `_internal` directories **at every level**: + +```cpp +class ZoneFilteringAccessor : public FilteringSourceAccessor { + bool isAllowed(const CanonPath & path) override { + // Check each path component + for (auto it = path.begin(); it != path.end(); ++it) { + if (*it == "_internal") + return false; + } + return true; + } +}; +``` + +This means: +- `//a/b/c` sees everything EXCEPT any `_internal` subdirectories +- `//a/b/c/_internal/d` sees everything EXCEPT any `_internal` subdirectories within it +- Each zone is hermetically sealed from its internal zones + +--- + +## Manifest Structure + +**Root manifest** (`//.meta/manifest.json`): +```json +{ + "//areas/tools/tec": {"id": "W-123456"}, + "//areas/platform/core": {"id": "W-789abc"} +} +``` + +**Internal manifest** (`//areas/tools/tec/_internal/manifest.json`): +```json +{ + "helpers": {"id": "W-def000"}, + "test-utils": {"id": "W-def001"}, + "deeply/nested/thing": {"id": "W-def002"} +} +``` + +Note: Internal manifest paths are **relative** (no `//` prefix). + +--- + +## Implementation Plan + +### Phase 1: Zone Path Parsing Infrastructure + +**File: `src/libexpr/primops/tectonix.cc`** + +```cpp +namespace { + +struct PeeledZonePath { + std::optional hostPath; + std::string localPath; + + bool isInternal() const { return hostPath.has_value(); } +}; + +PeeledZonePath peelZonePath(std::string_view path) { + auto pos = path.rfind("/_internal/"); + if (pos == std::string_view::npos) { + return {.hostPath = std::nullopt, .localPath = std::string(path)}; + } + return { + .hostPath = std::string(path.substr(0, pos)), + .localPath = std::string(path.substr(pos + 11)) + }; +} + +} // anonymous namespace +``` + +### Phase 2: Internal Manifest Reading + +**Add to `src/libexpr/primops/tectonix.cc`:** + +```cpp +static std::optional readInternalManifest( + EvalState & state, + const Hash & hostTreeSha) +{ + auto repo = state.getWorldRepo(); + GitAccessorOptions opts{.exportIgnore = false, .smudgeLfs = false}; + auto accessor = repo->getAccessor(hostTreeSha, opts, "host"); + + auto manifestPath = CanonPath("_internal/manifest.json"); + if (!accessor->pathExists(manifestPath)) + return std::nullopt; + + return nlohmann::json::parse(accessor->readFile(manifestPath)); +} +``` + +### Phase 3: Recursive Tree SHA Computation + +**Modify `EvalState::getWorldTreeSha` in `src/libexpr/eval.cc`:** + +```cpp +Hash EvalState::getWorldTreeSha(std::string_view zonePath) const +{ + auto peeled = peelZonePath(zonePath); + + if (!peeled.isInternal()) { + // Existing top-level logic (unchanged) + return computeTreeShaFromWorldRoot(peeled.localPath); + } + + // Internal zone: recursive resolution + auto hostTreeSha = getWorldTreeSha(*peeled.hostPath); + auto repo = getWorldRepo(); + + // Navigate: hostTree -> _internal -> localPath + auto internalTreeSha = repo->getSubtreeSha(hostTreeSha, "_internal"); + + // Walk through localPath segments + for (auto & segment : tokenizeString>(peeled.localPath, "/")) { + internalTreeSha = repo->getSubtreeSha(internalTreeSha, segment); + } + + return internalTreeSha; +} +``` + +### Phase 4: Zone Filtering Accessor + +**Add to `src/libfetchers/filtering-source-accessor.cc` or inline:** + +```cpp +class ZoneFilteringAccessor : public FilteringSourceAccessor { +public: + ZoneFilteringAccessor(ref next) + : FilteringSourceAccessor(std::move(next), makeNotAllowedError) {} + +private: + static MakeNotAllowedError makeNotAllowedError(const CanonPath & path) { + return RestrictedPathError( + fmt("'%s' is hidden (inside _internal)", path)); + } + + bool isAllowed(const CanonPath & path) override { + for (auto it = path.begin(); it != path.end(); ++it) { + if (*it == "_internal") + return false; + } + return true; + } +}; +``` + +### Phase 5: Updated Zone Resolution + +**Modify `prim_unsafeTectonixInternalZone` in `src/libexpr/primops/tectonix.cc`:** + +```cpp +static void prim_unsafeTectonixInternalZone(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto zonePath = state.forceStringNoCtx(*args[0], pos, "..."); + auto peeled = peelZonePath(zonePath); + + // Validate zone exists in appropriate manifest + if (!peeled.isInternal()) { + // Top-level: check root manifest (existing logic) + auto manifest = readRootManifest(state, pos); + if (!manifest.contains(std::string(zonePath))) + state.error("'%s' is not a zone", zonePath).atPos(pos).debugThrow(); + } else { + // Internal: resolve host, check its internal manifest + auto hostTreeSha = state.getWorldTreeSha(*peeled.hostPath); + auto internalManifest = readInternalManifest(state, hostTreeSha); + + if (!internalManifest) + state.error("zone '%s' has no internal manifest", *peeled.hostPath) + .atPos(pos).debugThrow(); + + if (!internalManifest->contains(peeled.localPath)) + state.error("'%s' is not an internal zone of '%s'", + peeled.localPath, *peeled.hostPath).atPos(pos).debugThrow(); + } + + // Get tree SHA (handles recursion internally) + auto treeSha = state.getWorldTreeSha(zonePath); + + // ... rest of existing logic, but wrap accessor with ZoneFilteringAccessor +} +``` + +### Phase 6: Updated `mountZoneByTreeSha` + +**Modify in `src/libexpr/eval.cc`:** + +```cpp +StorePath EvalState::mountZoneByTreeSha(const Hash & treeSha, std::string_view zonePath) +{ + // ... existing cache check ... + + auto repo = getWorldRepo(); + GitAccessorOptions opts{.exportIgnore = true, .smudgeLfs = false}; + auto rawAccessor = repo->getAccessor(treeSha, opts, "zone"); + + // NEW: Wrap with _internal filter + auto accessor = make_ref(rawAccessor); + + // ... rest of existing logic ... +} +``` + +### Phase 7: Dirty Zone Detection for Internal Zones + +**Modify `getTectonixDirtyZones` in `src/libexpr/eval.cc`:** + +This is trickier because we need to: +1. Detect dirty files in the checkout +2. Map them to zones (including internal zones) +3. A file at `a/b/_internal/c/foo.nix` means zone `//a/b/_internal/c` is dirty + +```cpp +// When processing dirty files, check if path contains _internal +// and attribute dirtiness to the correct internal zone + +for (auto & dirtyFile : dirtyFiles) { + auto zonePath = findEnclosingZone(dirtyFile, allManifests); + dirtyZones[zonePath] = true; +} +``` + +--- + +## Summary of Changes + +| Component | Change | +|-----------|--------| +| Zone path parsing | Add `peelZonePath()` function | +| Tree SHA computation | Recursive resolution for internal zones | +| Manifest lookup | Support internal manifests relative to host zones | +| Source accessor | Filter `_internal` at all levels | +| Zone validation | Check appropriate manifest (root vs internal) | +| Dirty detection | Attribute dirty files to correct zone level | + +--- + +## Design Elegance + +The elegance comes from: + +1. **One grammar** for all zone paths +2. **One algorithm** (peel + recurse) for all resolution depths +3. **One filter** (`_internal` everywhere) for all source access +4. **Relative paths** in internal manifests (no duplication of host path) + +--- + +## Edge Cases + +### Zone path with consecutive `_internal` + +`//a/_internal/_internal/b` — This shouldn't happen by design (manifest would declare `_internal/b`, not `_internal`). Should error gracefully. + +### Missing internal manifest + +Error clearly: "Zone X does not have an internal manifest" + +### Zone references itself + +Not possible with the manifest structure. + +### Circular internal zones + +Not possible — each `_internal` is strictly nested deeper. + +### Dirty zone detection for internal zones + +Need to check if the internal zone's files are dirty. The host zone being dirty doesn't mean the internal zone is dirty. + +--- + +## Future Considerations: Access Control + +The design mentions that internal zones are "only readable from the zone that encloses them or their co-internal cousins." This access control could be enforced at: + +1. **Nix expression level** — The code that uses these builtins enforces who can call them +2. **Builtin level** — Add a "caller zone" context and validate access + +This is deferred to a future phase. diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 7180825e5b8..44a62a8c5de 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -478,8 +478,113 @@ bool EvalState::isTectonixSourceAvailable() const return !settings.tectonixCheckoutPath.get().empty(); } +// ============================================================================ +// Zone Path Parsing (for internal zone support) +// ============================================================================ + +namespace { + +/** + * Result of peeling a zone path into host and local components. + */ +struct PeeledZonePath { + std::optional hostPath; // nullopt for top-level zones + std::string localPath; // The path to look up in manifest + + bool isInternal() const { return hostPath.has_value(); } +}; + +/** + * Peel a zone path to extract the innermost internal zone layer. + * + * Uses rfind to find the rightmost "/_internal/" marker: + * - peel("//a/b/c") → {nullopt, "//a/b/c"} — top-level + * - peel("//a/b/_internal/c") → {"//a/b", "c"} — one level of nesting + * - peel("//a/_internal/b/_internal/c") → {"//a/_internal/b", "c"} — recursive host + */ +PeeledZonePath peelZonePath(std::string_view path) { + constexpr std::string_view marker = "/_internal/"; + auto pos = path.rfind(marker); + + if (pos == std::string_view::npos) { + return {.hostPath = std::nullopt, .localPath = std::string(path)}; + } + + return { + .hostPath = std::string(path.substr(0, pos)), + .localPath = std::string(path.substr(pos + marker.size())) + }; +} + +// ============================================================================ +// Zone Filtering Accessor (filters _internal directories) +// ============================================================================ + +/** + * A filtering source accessor that hides `_internal` directories. + * + * Every zone's source accessor filters out `_internal` directories at every level, + * ensuring that zones are hermetically sealed from their internal zones. + * + * Example: + * - `//a/b/c` sees everything EXCEPT any `_internal` subdirectories + * - `//a/b/c/_internal/d` sees everything EXCEPT any `_internal` subdirectories within it + */ +class ZoneFilteringAccessor : public FilteringSourceAccessor { +public: + ZoneFilteringAccessor(ref next) + : FilteringSourceAccessor(SourcePath(next), makeNotAllowedError) + { + } + +private: + static RestrictedPathError makeNotAllowedError(const CanonPath & path) { + return RestrictedPathError("'%s' is hidden (inside _internal)", path); + } + + bool isAllowed(const CanonPath & path) override { + // Check each path component for _internal + for (auto & component : path) { + if (component == "_internal") + return false; + } + return true; + } +}; + +} // anonymous namespace + +// ============================================================================ +// World Tree SHA Resolution (with internal zone support) +// ============================================================================ + +/** + * Get tree SHA for a world path (top-level zones only, no recursion). + * This is the internal implementation that walks from the git root. + */ Hash EvalState::getWorldTreeSha(std::string_view worldPath) const { + auto peeled = peelZonePath(worldPath); + + if (peeled.isInternal()) { + // Internal zone: resolve host first (recursive!) + auto hostTreeSha = getWorldTreeSha(*peeled.hostPath); + auto repo = getWorldRepo(); + + // Navigate: hostTree -> _internal -> localPath + auto internalTreeSha = repo->getSubtreeSha(hostTreeSha, "_internal"); + + // Walk through localPath segments + Hash currentSha = internalTreeSha; + for (auto & segment : tokenizeString>(peeled.localPath, "/")) { + if (segment.empty()) continue; + currentSha = repo->getSubtreeSha(currentSha, segment); + } + + return currentSha; + } + + // Top-level zone: use original logic // Normalize path (remove leading //) std::string path(worldPath); if (hasPrefix(path, "//")) @@ -626,6 +731,48 @@ const std::map & EvalState::getTectonixDirtyZones() const dirtyZones[zonePath] = false; } + // Helper function to recursively discover and register internal zones + std::function discoverInternalZones; + discoverInternalZones = [&](const std::string & hostZonePath, const Hash & hostTreeSha) { + // Check if host zone has an internal manifest + auto hostAccessor = repo->getAccessor(hostTreeSha, opts, "host"); + auto internalManifestPath = CanonPath("_internal/manifest.json"); + + if (!hostAccessor->pathExists(internalManifestPath)) + return; + + auto internalManifestContent = hostAccessor->readFile(internalManifestPath); + auto internalManifest = nlohmann::json::parse(internalManifestContent); + + // Register each internal zone + for (auto & [localPath, value] : internalManifest.items()) { + std::string internalZonePath = hostZonePath + "/_internal/" + localPath; + dirtyZones[internalZonePath] = false; + + // Recursively discover nested internal zones + try { + auto internalTreeSha = repo->getSubtreeSha(hostTreeSha, "_internal"); + for (auto & segment : tokenizeString>(localPath, "/")) { + if (!segment.empty()) + internalTreeSha = repo->getSubtreeSha(internalTreeSha, segment); + } + discoverInternalZones(internalZonePath, internalTreeSha); + } catch (...) { + // Internal zone tree not found, skip recursion + } + } + }; + + // Discover internal zones for each top-level zone + for (auto & [zoneId, zonePath] : zoneIdToPath) { + try { + auto zoneTreeSha = getWorldTreeSha(zonePath); + discoverInternalZones(zonePath, zoneTreeSha); + } catch (...) { + // Zone tree not found, skip internal zone discovery + } + } + // Get dirty files via git status (avoids libgit2 reftables issue) auto checkoutPath = settings.tectonixCheckoutPath.get(); auto gitStatusOutput = runProgram("git", true, {"-C", checkoutPath, "status", "--porcelain"}); @@ -635,7 +782,9 @@ const std::map & EvalState::getTectonixDirtyZones() const // Format: "XY filename" where XY is 2-char status auto filePath = "/" + std::string(line.substr(3)); - // Find which zone this file belongs to + // Find the most specific zone this file belongs to + // (internal zones are more specific than their host zones) + std::string bestMatch; for (auto & [zonePath, dirty] : dirtyZones) { // Normalize zone path for comparison (remove leading //) std::string normalizedZonePath = zonePath; @@ -643,10 +792,16 @@ const std::map & EvalState::getTectonixDirtyZones() const normalizedZonePath = normalizedZonePath.substr(1); // keep one / if (hasPrefix(filePath, normalizedZonePath + "/") || filePath == normalizedZonePath) { - dirtyZones[zonePath] = true; - break; + // Prefer longer (more specific) matches + if (normalizedZonePath.size() > bestMatch.size()) { + bestMatch = zonePath; + } } } + + if (!bestMatch.empty()) { + dirtyZones[bestMatch] = true; + } } } } @@ -685,7 +840,10 @@ StorePath EvalState::getZoneStorePath(std::string_view zonePath) // Eager mode: immediate copy from git ODB auto repo = getWorldRepo(); GitAccessorOptions opts{.exportIgnore = true, .smudgeLfs = false}; - auto accessor = repo->getAccessor(treeSha, opts, "zone"); + auto rawAccessor = repo->getAccessor(treeSha, opts, "zone"); + + // Wrap with _internal filter to hide internal zones + auto accessor = make_ref(rawAccessor); std::string name = "zone-" + replaceStrings(zone, "/", "-"); auto storePath = fetchToStore( @@ -716,7 +874,10 @@ StorePath EvalState::mountZoneByTreeSha(const Hash & treeSha, std::string_view z // Not cached: create accessor and mount auto repo = getWorldRepo(); GitAccessorOptions opts{.exportIgnore = true, .smudgeLfs = false}; - auto accessor = repo->getAccessor(treeSha, opts, "zone"); + auto rawAccessor = repo->getAccessor(treeSha, opts, "zone"); + + // Wrap with _internal filter to hide internal zones from this zone's view + auto accessor = make_ref(rawAccessor); // Generate name from zone path std::string zone(zonePath); @@ -728,7 +889,7 @@ StorePath EvalState::mountZoneByTreeSha(const Hash & treeSha, std::string_view z auto storePath = StorePath::random(name); allowPath(storePath); - // Mount accessor at this path + // Mount the filtered accessor at this path storeFS->mount(CanonPath(store->printStorePath(storePath)), accessor); // Cache it (thread-safe) @@ -759,13 +920,15 @@ StorePath EvalState::getZoneFromCheckout(std::string_view zonePath) if (!settings.lazyTrees) { // Eager mode: immediate copy from checkout - auto checkoutAccessor = getWorldCheckoutAccessor(); - if (!checkoutAccessor) - throw Error("checkout accessor not available for dirty zone '%s'", zonePath); + // Create an accessor rooted at the zone directory + auto rawAccessor = makeFSSourceAccessor(fullPath); + + // Wrap with _internal filter to hide internal zones + auto accessor = make_ref(rawAccessor); auto storePath = fetchToStore( fetchSettings, *store, - SourcePath(*checkoutAccessor, CanonPath(checkoutPath + "/" + zone)), + SourcePath(accessor, CanonPath::root), FetchMode::Copy, name); allowPath(storePath); @@ -783,13 +946,16 @@ StorePath EvalState::getZoneFromCheckout(std::string_view zonePath) } // Not cached: create accessor rooted at zone directory and mount - auto accessor = makeFSSourceAccessor(fullPath); + auto rawAccessor = makeFSSourceAccessor(fullPath); + + // Wrap with _internal filter to hide internal zones from this zone's view + auto accessor = make_ref(rawAccessor); // Create virtual store path auto storePath = StorePath::random(name); allowPath(storePath); - // Mount accessor at this path + // Mount the filtered accessor at this path storeFS->mount(CanonPath(store->printStorePath(storePath)), accessor); // Cache it (thread-safe) diff --git a/src/libexpr/primops/tectonix.cc b/src/libexpr/primops/tectonix.cc index 6f684b123b6..765611151ac 100644 --- a/src/libexpr/primops/tectonix.cc +++ b/src/libexpr/primops/tectonix.cc @@ -9,6 +9,82 @@ namespace nix { +// ============================================================================ +// Zone Path Parsing Infrastructure +// ============================================================================ + +/** + * Result of peeling a zone path into host and local components. + * + * For top-level zones like "//a/b/c", hostPath is nullopt and localPath is "//a/b/c". + * For internal zones like "//a/b/_internal/c", hostPath is "//a/b" and localPath is "c". + * For nested internal zones like "//a/_internal/b/_internal/c", hostPath is "//a/_internal/b" and localPath is "c". + */ +struct PeeledZonePath { + std::optional hostPath; // nullopt for top-level zones + std::string localPath; // The path to look up in manifest + + bool isInternal() const { return hostPath.has_value(); } +}; + +/** + * Peel a zone path to extract the innermost internal zone layer. + * + * Uses rfind to find the rightmost "/_internal/" marker: + * - peel("//a/b/c") → {nullopt, "//a/b/c"} — top-level + * - peel("//a/b/_internal/c") → {"//a/b", "c"} — one level of nesting + * - peel("//a/_internal/b/_internal/c") → {"//a/_internal/b", "c"} — recursive host + */ +static PeeledZonePath peelZonePath(std::string_view path) { + constexpr std::string_view marker = "/_internal/"; + auto pos = path.rfind(marker); + + if (pos == std::string_view::npos) { + return {.hostPath = std::nullopt, .localPath = std::string(path)}; + } + + return { + .hostPath = std::string(path.substr(0, pos)), + .localPath = std::string(path.substr(pos + marker.size())) + }; +} + +// ============================================================================ +// Internal Manifest Reading +// ============================================================================ + +/** + * Read the internal manifest from a host zone's tree. + * + * Internal manifests are located at `_internal/manifest.json` within the host zone + * and contain relative paths (no // prefix) mapping to zone IDs. + * + * Example internal manifest: + * { + * "helpers": {"id": "W-def000"}, + * "test-utils": {"id": "W-def001"}, + * "deeply/nested/thing": {"id": "W-def002"} + * } + * + * @param state The evaluation state + * @param hostTreeSha The tree SHA of the host zone + * @return The parsed internal manifest JSON, or nullopt if no internal manifest exists + */ +static std::optional readInternalManifest( + EvalState & state, + const Hash & hostTreeSha) +{ + auto repo = state.getWorldRepo(); + GitAccessorOptions opts{.exportIgnore = false, .smudgeLfs = false}; + auto accessor = repo->getAccessor(hostTreeSha, opts, "host"); + + auto manifestPath = CanonPath("_internal/manifest.json"); + if (!accessor->pathExists(manifestPath)) + return std::nullopt; + + return nlohmann::json::parse(accessor->readFile(manifestPath)); +} + // Helper to read the manifest JSON content static std::string readManifestContent(EvalState & state, const PosIdx pos) { @@ -272,14 +348,33 @@ static void prim_unsafeTectonixInternalZone(EvalState & state, const PosIdx pos, auto zonePath = state.forceStringNoCtx(*args[0], pos, "while evaluating the 'zonePath' argument to builtins.__unsafeTectonixInternalZone"); - // Validate that zonePath is exactly a zone root (exists in manifest) - auto content = readManifestContent(state, pos); - auto manifest = nlohmann::json::parse(content); - if (!manifest.contains(std::string(zonePath))) - state.error("'%s' is not a zone root (must be an exact path from the manifest)", zonePath) - .atPos(pos).debugThrow(); + // Peel the zone path to determine if it's top-level or internal + auto peeled = peelZonePath(zonePath); + + // Validate that zonePath exists in the appropriate manifest + if (!peeled.isInternal()) { + // Top-level zone: check root manifest + auto content = readManifestContent(state, pos); + auto manifest = nlohmann::json::parse(content); + if (!manifest.contains(std::string(zonePath))) + state.error("'%s' is not a zone root (must be an exact path from the manifest)", zonePath) + .atPos(pos).debugThrow(); + } else { + // Internal zone: resolve host zone and check its internal manifest + auto hostTreeSha = state.getWorldTreeSha(*peeled.hostPath); + auto internalManifest = readInternalManifest(state, hostTreeSha); + + if (!internalManifest) + state.error("zone '%s' has no internal manifest", *peeled.hostPath) + .atPos(pos).debugThrow(); + + if (!internalManifest->contains(peeled.localPath)) + state.error("'%s' is not an internal zone of '%s'", + peeled.localPath, *peeled.hostPath) + .atPos(pos).debugThrow(); + } - // Get tree SHA before we potentially fetch + // Get tree SHA (handles recursion internally) auto treeSha = state.getWorldTreeSha(zonePath); // Check dirty status