diff --git a/LAZY_TREES_PLAN.md b/LAZY_TREES_PLAN.md new file mode 100644 index 00000000000..0bc846bad16 --- /dev/null +++ b/LAZY_TREES_PLAN.md @@ -0,0 +1,565 @@ +# Tectonix Lazy Trees Integration Plan + +This document outlines the plan to integrate tectonix zone access with Nix's lazy-trees infrastructure, enabling on-demand copying of zone sources to the store. + +## Background + +### Current Behavior + +When `builtins.unsafeTectonixInternalZoneSrc "//areas/tools/tec"` is called, the entire zone is immediately copied to the Nix store via `fetchToStore()`, regardless of whether the zone content is actually needed for a derivation. + +### Lazy Trees in Flakes + +With `lazy-trees = true`, flakes avoid this eager copying: + +1. `mountInput()` creates a random store path and mounts a `GitSourceAccessor` at that path +2. Files are read on-demand from the git ODB during evaluation +3. Only when the path is used as a derivation input does `devirtualize()` copy it to the store + +### Goal + +Apply the same lazy behavior to tectonix zones, while respecting zone boundaries and dirty zone detection. + +--- + +## Architectural Comparison: Flakes vs Tectonix + +### Flakes + +``` +FlakeRef (github:nixos/nixpkgs/abc123) + │ + ▼ +InputCache.getAccessor() + │ + ▼ +Input.getAccessor() → GitSourceAccessor (lazy) + │ + ▼ +mountInput() + │ + ├─► lazyTrees=false: fetchToStore() immediately + │ + └─► lazyTrees=true: + StorePath::random("nixpkgs") + storeFS->mount(storePath, accessor) + return virtual path + +Later, when used in derivation: + │ + ▼ +devirtualize() → fetchToStore() → real store path +``` + +**Key point:** Each flake is its own unit. The accessor is rooted at the flake, and the whole flake gets mounted at one store path. + +### Tectonix Challenge + +``` +world @ sha:abc123 +├── areas/ +│ ├── tools/ +│ │ ├── tec/ ← Zone (tree: deadbeef) +│ │ ├── dev/ ← Zone (tree: cafebabe) +│ │ └── ... +│ └── platform/ +│ └── ... +└── .meta/ + └── manifest.json + +Problem: Can't mount whole world at one path! + +Using /nix/store/xxx-world/areas/tools/tec as derivation src +would pull in the ENTIRE world when devirtualized. + +Solution: Mount each zone separately at its own store path. +``` + +### What Makes Tectonix Harder + +1. **Granularity mismatch**: Flakes = one input = one mount. World = one repo = thousands of zones. +2. **No `Input` abstraction**: Flakes have `fetchers::Input` with `getAccessor()`, caching, locking. Tectonix builtins are ad-hoc. +3. **Dirty zone complexity**: Flakes mark dirty inputs as "unlocked". Tectonix needs zone-granular dirty detection with checkout fallback. +4. **Two-mode operation**: Git ODB vs checkout. Flakes only have one source per input. + +### What Makes Tectonix Easier + +1. **Content-addressed by nature**: Tree SHA is the *perfect* cache key. Same tree SHA across different world commits = identical content. +2. **No resolution complexity**: No registries, no indirect references, no lock file management. +3. **Already have the accessor**: `getWorldGitAccessor()` returns a lazy `GitSourceAccessor`. +4. **Single source of truth**: One repo, one commit SHA. + +--- + +## Design + +### Core Concept: Zone Mounts by Tree SHA + +``` +builtins.worldZone "//areas/tools/tec" + │ + ▼ +getZoneStorePath(zonePath) + │ + ├─► isDirty? ─────────────────────────────┐ + │ │ │ + │ ▼ │ + │ getZoneFromCheckout() │ + │ (EXTENSION POINT: eager for now) │ + │ │ │ + │ ▼ │ + │ return store path ◄────────────────────┘ + │ + └─► !isDirty + │ + ▼ + treeSha = getWorldTreeSha(zonePath) + │ + ▼ + mountZoneByTreeSha(treeSha) + │ + ├─► cached? return cached store path + │ + └─► not cached: + accessor = repo->getAccessor(treeSha) + storePath = StorePath::random(name) + storeFS->mount(storePath, accessor) + cache[treeSha] = storePath + return storePath +``` + +### Why Tree SHA as Cache Key + +``` +World @ v1 (sha: aaa) World @ v2 (sha: bbb) +├── areas/tools/tec ├── areas/tools/tec +│ (tree: deadbeef) ─────────────│ (tree: deadbeef) ← SAME! +│ │ +├── areas/tools/dev ├── areas/tools/dev +│ (tree: cafebabe) │ (tree: 12345678) ← Changed +``` + +If `//areas/tools/tec` didn't change between commits, its tree SHA is identical. The zone cache returns the same virtual store path, and when devirtualized, the same real store path. **Natural deduplication across world revisions.** + +--- + +## Implementation + +### Phase 1: Core Infrastructure + +#### 1.1 EvalState Additions (`src/libexpr/include/nix/expr/eval.hh`) + +```cpp +// In EvalState class: + +private: + /** + * Cache tree SHA → virtual store path for lazy zone mounts. + * Thread-safe for eval-cores > 1. + */ + Sync> tectonixZoneCache_; + +public: + /** + * Get a zone's store path, handling dirty detection and lazy mounting. + * + * For clean zones with lazy-trees enabled: mounts accessor lazily + * For dirty zones: currently eager-copies from checkout (extension point) + * For lazy-trees disabled: eager-copies from git + */ + StorePath getZoneStorePath(std::string_view zonePath); + +private: + /** + * Mount a zone by tree SHA, returning a (potentially virtual) store path. + * Caches by tree SHA for deduplication across world revisions. + */ + StorePath mountZoneByTreeSha(const Hash & treeSha, std::string_view zonePath); + + /** + * Get zone store path from checkout (for dirty zones). + * EXTENSION POINT: Currently always eager. Could be made lazy later. + */ + StorePath getZoneFromCheckout(std::string_view zonePath); +``` + +#### 1.2 Implementation (`src/libexpr/eval.cc`) + +```cpp +StorePath EvalState::getZoneStorePath(std::string_view zonePath) +{ + // Normalize path + std::string zone(zonePath); + if (hasPrefix(zone, "//")) + zone = zone.substr(2); + + // Check dirty status + bool isDirty = false; + if (isTectonixSourceAvailable()) { + auto & dirtyZones = getTectonixDirtyZones(); + auto it = dirtyZones.find(std::string(zonePath)); + isDirty = it != dirtyZones.end() && it->second; + } + + if (isDirty) { + // EXTENSION POINT: For now, always eager from checkout + return getZoneFromCheckout(zonePath); + } + + // Clean zone: get tree SHA + auto treeSha = getWorldTreeSha(zonePath); + + if (!settings.lazyTrees) { + // Eager mode: immediate copy from git ODB + auto repo = getWorldRepo(); + GitAccessorOptions opts{.exportIgnore = true, .smudgeLfs = false}; + auto accessor = repo->getAccessor(treeSha, opts, "zone"); + + std::string name = "zone-" + replaceStrings(zone, "/", "-"); + auto storePath = fetchToStore( + fetchSettings, *store, + SourcePath(accessor, CanonPath::root), + FetchMode::Copy, name); + + allowPath(storePath); + return storePath; + } + + // Lazy mode: mount by tree SHA + return mountZoneByTreeSha(treeSha, zonePath); +} + +StorePath EvalState::mountZoneByTreeSha(const Hash & treeSha, std::string_view zonePath) +{ + // Check cache first (thread-safe) + { + auto cache = tectonixZoneCache_.readLock(); + auto it = cache->find(treeSha); + if (it != cache->end()) { + debug("zone cache hit for tree %s", treeSha.gitRev()); + return it->second; + } + } + + // Not cached: create accessor and mount + auto repo = getWorldRepo(); + GitAccessorOptions opts{.exportIgnore = true, .smudgeLfs = false}; + auto accessor = repo->getAccessor(treeSha, opts, "zone"); + + // Generate name from zone path + std::string zone(zonePath); + if (hasPrefix(zone, "//")) + zone = zone.substr(2); + std::string name = "zone-" + replaceStrings(zone, "/", "-"); + + // Create virtual store path + auto storePath = StorePath::random(name); + allowPath(storePath); + + // Mount accessor at this path + storeFS->mount(CanonPath(store->printStorePath(storePath)), accessor); + + // Cache it (thread-safe) + { + auto cache = tectonixZoneCache_.lock(); + auto [it, inserted] = cache->try_emplace(treeSha, storePath); + if (!inserted) { + // Another thread beat us, use their path + return it->second; + } + } + + debug("mounted zone %s (tree %s) at %s", + zonePath, treeSha.gitRev(), store->printStorePath(storePath)); + + return storePath; +} + +StorePath EvalState::getZoneFromCheckout(std::string_view zonePath) +{ + // EXTENSION POINT: Currently always eager. + // + // To make this lazy later, we'd need to: + // 1. Create a filtered accessor over the checkout path + // 2. Compute a content key (hash of modified files? mtime-based?) + // 3. Cache and mount like mountZoneByTreeSha + // + // For now: just copy from checkout. + + std::string zone(zonePath); + if (hasPrefix(zone, "//")) + zone = zone.substr(2); + + auto checkoutAccessor = getWorldCheckoutAccessor(); + if (!checkoutAccessor) + throw Error("checkout accessor not available for dirty zone '%s'", zonePath); + + auto checkoutPath = settings.tectonixCheckoutPath.get(); + auto fullPath = CanonPath(checkoutPath + "/" + zone); + + std::string name = "zone-" + replaceStrings(zone, "/", "-"); + + auto storePath = fetchToStore( + fetchSettings, *store, + SourcePath(*checkoutAccessor, fullPath), + FetchMode::Copy, name); + + allowPath(storePath); + return storePath; +} +``` + +### Phase 2: Updated Builtins (`src/libexpr/primops/tectonix.cc`) + +#### 2.1 Simplify `prim_unsafeTectonixInternalZoneSrc` + +```cpp +static void prim_unsafeTectonixInternalZoneSrc(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto zonePath = state.forceStringNoCtx(*args[0], pos, + "while evaluating the 'zonePath' argument to builtins.unsafeTectonixInternalZoneSrc"); + + auto storePath = state.getZoneStorePath(zonePath); + state.allowAndSetStorePathString(storePath, v); +} +``` + +#### 2.2 New `prim_worldZone` (flake-like interface) + +```cpp +static void prim_worldZone(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto zonePath = state.forceStringNoCtx(*args[0], pos, + "while evaluating the 'zonePath' argument to builtins.worldZone"); + + // Get tree SHA before we potentially fetch + auto treeSha = state.getWorldTreeSha(zonePath); + + // Check dirty status + bool isDirty = false; + if (state.isTectonixSourceAvailable()) { + auto & dirtyZones = state.getTectonixDirtyZones(); + auto it = dirtyZones.find(std::string(zonePath)); + isDirty = it != dirtyZones.end() && it->second; + } + + auto storePath = state.getZoneStorePath(zonePath); + auto storePathStr = state.store->printStorePath(storePath); + + // Build result attrset (like fetchTree) + auto attrs = state.buildBindings(4); + + attrs.alloc("outPath").mkString(storePathStr, { + NixStringContextElem::Opaque{storePath} + }); + attrs.alloc("treeSha").mkString(treeSha.gitRev(), state.mem); + attrs.alloc("zonePath").mkString(zonePath, state.mem); + attrs.alloc("dirty").mkBool(isDirty); + + v.mkAttrs(attrs); +} + +static RegisterPrimOp primop_worldZone({ + .name = "worldZone", + .args = {"zonePath"}, + .doc = R"( + Get a zone from the world repository. + + Returns an attrset with: + - outPath: Store path containing zone source (lazy with lazy-trees) + - treeSha: Git tree SHA for this zone + - zonePath: The zone path argument + - dirty: Whether the zone has uncommitted changes + + Example: `builtins.worldZone "//areas/tools/tec"` + + Requires `--tectonix-git-dir` and `--tectonix-sha` to be set. + )", + .fun = prim_worldZone, +}); +``` + +#### 2.3 New `prim_worldRoot` (read-only world access) + +```cpp +static void prim_worldRoot(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + // Lazily mount the whole world accessor once per evaluation + auto storePath = state.getOrMountWorldRoot(); + + v.mkPath(state.rootPath( + CanonPath(state.store->printStorePath(storePath)))); +} + +static RegisterPrimOp primop_worldRoot({ + .name = "worldRoot", + .args = {}, + .doc = R"( + Get a path to the world repository root. + + This path can be used for reading files during evaluation: + + let world = builtins.worldRoot; + in import (world + "/areas/tools/tec/zone.nix") + + WARNING: Do not use this path directly as a derivation src! + That would copy the entire world to the store. Use + builtins.worldZone for derivation sources. + + Requires `--tectonix-git-dir` and `--tectonix-sha` to be set. + )", + .fun = prim_worldRoot, +}); +``` + +With supporting method in `EvalState`: + +```cpp +StorePath EvalState::getOrMountWorldRoot() +{ + // Thread-safe lazy initialization + static std::once_flag mounted; + static StorePath worldStorePath; + + std::call_once(mounted, [this]() { + auto accessor = getWorldGitAccessor(); + worldStorePath = StorePath::random("world"); + allowPath(worldStorePath); + storeFS->mount( + CanonPath(store->printStorePath(worldStorePath)), + accessor); + }); + + return worldStorePath; +} +``` + +--- + +## Usage Examples + +### Before (eager) + +```nix +let + zoneSrc = builtins.unsafeTectonixInternalZoneSrc "//areas/tools/tec"; + # ^ Entire zone copied to store immediately +in +mkDerivation { + src = zoneSrc; + ... +} +``` + +### After (lazy) + +```nix +let + world = builtins.worldRoot; + + # Read-only access (no store copy during evaluation) + zoneNix = import (world + "/areas/tools/tec/zone.nix"); + manifest = builtins.fromJSON (builtins.readFile (world + "/.meta/manifest.json")); + + # For derivation src, use worldZone (zone-granular lazy copy) + tecZone = builtins.worldZone "//areas/tools/tec"; +in +mkDerivation { + src = tecZone.outPath; # Only copied when derivation is built + ... +} +``` + +--- + +## Builtin Migration Guide + +| Old Pattern | New Pattern | +|-------------|-------------| +| `__unsafeTectonixInternalZoneSrc path` | `(worldZone path).outPath` | +| `__unsafeTectonixInternalTreeSha path` then `__unsafeTectonixInternalTree sha` | `(worldZone path).outPath` | +| `__unsafeTectonixInternalFile path` | `builtins.readFile (worldRoot + path)` | +| `__unsafeTectonixInternalDir zone subpath` | `builtins.readDir (worldRoot + zone + "/" + subpath)` | + +The `__unsafeTectonixInternalTree` builtin can be retained for edge cases (fetching arbitrary tree SHAs not corresponding to zones), but becomes less central. + +--- + +## Extension Point: Lazy Dirty Zones + +The `getZoneFromCheckout()` function is the clear extension point for future optimization. + +### Current Behavior + +Dirty zones are always eagerly copied from checkout: + +```cpp +StorePath EvalState::getZoneFromCheckout(std::string_view zonePath) +{ + // Always eager for now + return fetchToStore(...); +} +``` + +### Future Options + +1. **Content-hash dirty files** + - Walk checkout, hash modified files + - Use combined hash as cache key + - Complex but accurate + +2. **Overlay accessor** + - Base: git ODB accessor for zone + - Overlay: checkout accessor filtered to dirty files + - Mount the composite accessor + - Cache key: `(treeSha, set of dirty file paths)` + +3. **Mtime-based caching** + - Use checkout accessor with mtime as cache key + - Simpler but may re-copy on unrelated file touches + +The interface is clean: `getZoneStorePath()` decides dirty vs clean and delegates appropriately. The dirty path can be made lazy without changing callers. + +--- + +## Testing Plan + +1. **Lazy-trees enabled, clean zone** + - Verify virtual store path is created + - Verify no immediate copy to store + - Verify devirtualization on derivation build + +2. **Lazy-trees disabled** + - Verify immediate copy (current behavior preserved) + +3. **Dirty zones** + - Verify fallback to checkout + - Verify eager copy (for now) + +4. **Cache behavior** + - Same tree SHA returns same virtual path + - Different tree SHA returns different path + - Thread-safe with `eval-cores > 1` + +5. **Cross-world-revision deduplication** + - Zone unchanged between commits → same devirtualized store path + +--- + +## Summary + +| Component | Purpose | +|-----------|---------| +| `tectonixZoneCache_` | Tree SHA → virtual store path mapping | +| `getZoneStorePath()` | Orchestrator: dirty detection → dispatch | +| `mountZoneByTreeSha()` | Lazy mount for clean zones | +| `getZoneFromCheckout()` | Eager (for now) for dirty zones - **extension point** | +| `worldZone` | High-level builtin returning attrset | +| `worldRoot` | Read-only world access path | + +This design: +- Integrates cleanly with existing lazy-trees infrastructure +- Uses tree SHA for natural content-addressed caching +- Leaves clear extension point for dirty zone optimization +- Provides flake-like API consistency +- Enables `worldRoot` for ergonomic read-only access diff --git a/NESTED-ZONES.md b/NESTED-ZONES.md new file mode 100644 index 00000000000..1ca398617cd --- /dev/null +++ b/NESTED-ZONES.md @@ -0,0 +1,392 @@ +# Nested Zones Design and Implementation Plan + +This document describes the design for **nested (internal) zones** - zones that exist within other zones, providing encapsulation and modularity. + +## Overview + +Internal zones are: +- **Hidden** from their host zone's lazy-tree source (as if `_internal` doesn't exist) +- **Addressable** as first-class zones via extended paths like `//a/b/c/_internal/d/e` +- **Recursively nestable** - internal zones can have their own `_internal` with more zones + +### Constraints + +The `_internal` directory must contain precisely: +1. A `manifest.json` +2. Zone directories +3. No other files + +Internal zones are only readable from: +- The enclosing zone +- Co-internal cousins within that enclosing zone + +--- + +## Zone Path Algebra + +### Grammar + +``` +zone_path ::= top_level | internal +top_level ::= "//" segments +internal ::= zone_path "/_internal/" segments +segments ::= name ("/" name)* +``` + +This grammar reveals the key insight: **an internal zone path is recursive** - the host of an internal zone can itself be an internal zone. + +### Examples + +| Path | Host | Internal Path | +|------|------|---------------| +| `//areas/tools/tec` | (root manifest) | — | +| `//areas/tools/tec/_internal/helpers` | `//areas/tools/tec` | `helpers` | +| `//areas/tools/tec/_internal/a/b/_internal/c` | `//areas/tools/tec/_internal/a/b` | `c` | + +### The "Peel" Operation + +Every zone path can be **peeled** into at most one layer: + +```cpp +struct PeeledZonePath { + std::optional hostPath; // nullopt for top-level + std::string localPath; // The path to look up in manifest +}; + +PeeledZonePath peel(std::string_view path) { + auto pos = path.rfind("/_internal/"); + if (pos == std::string_view::npos) { + return {.hostPath = std::nullopt, .localPath = std::string(path)}; + } + return { + .hostPath = std::string(path.substr(0, pos)), + .localPath = std::string(path.substr(pos + 11)) // skip "/_internal/" + }; +} +``` + +This is elegant because: +- `peel("//a/b/c")` → `{nullopt, "//a/b/c"}` — top-level +- `peel("//a/b/_internal/c")` → `{"//a/b", "c"}` — one level of nesting +- `peel("//a/_internal/b/_internal/c")` → `{"//a/_internal/b", "c"}` — recursive host + +--- + +## Resolution Algorithm + +``` +resolveZone(path): + peeled = peel(path) + + if peeled.hostPath is null: + # Top-level zone: use root manifest + manifest = readRootManifest() + assert peeled.localPath in manifest + treeSha = computeTreeShaFromWorldRoot(peeled.localPath) + return Zone(path, treeSha, manifest[peeled.localPath].id) + + # Internal zone: resolve host first (recursive!) + hostZone = resolveZone(peeled.hostPath) + + # Read host's internal manifest + internalManifest = readFile(hostZone.tree, "_internal/manifest.json") + assert peeled.localPath in internalManifest + + # Compute tree SHA relative to host + treeSha = getSubtreeSha(hostZone.treeSha, "_internal/" + peeled.localPath) + + return Zone(path, treeSha, internalManifest[peeled.localPath].id) +``` + +The beauty: **one algorithm handles arbitrary nesting depth** through recursion. + +--- + +## Source Filtering: The Disappearing `_internal` + +Every zone's source accessor must filter out `_internal` directories **at every level**: + +```cpp +class ZoneFilteringAccessor : public FilteringSourceAccessor { + bool isAllowed(const CanonPath & path) override { + // Check each path component + for (auto it = path.begin(); it != path.end(); ++it) { + if (*it == "_internal") + return false; + } + return true; + } +}; +``` + +This means: +- `//a/b/c` sees everything EXCEPT any `_internal` subdirectories +- `//a/b/c/_internal/d` sees everything EXCEPT any `_internal` subdirectories within it +- Each zone is hermetically sealed from its internal zones + +--- + +## Manifest Structure + +**Root manifest** (`//.meta/manifest.json`): +```json +{ + "//areas/tools/tec": {"id": "W-123456"}, + "//areas/platform/core": {"id": "W-789abc"} +} +``` + +**Internal manifest** (`//areas/tools/tec/_internal/manifest.json`): +```json +{ + "helpers": {"id": "W-def000"}, + "test-utils": {"id": "W-def001"}, + "deeply/nested/thing": {"id": "W-def002"} +} +``` + +Note: Internal manifest paths are **relative** (no `//` prefix). + +--- + +## Implementation Plan + +### Phase 1: Zone Path Parsing Infrastructure + +**File: `src/libexpr/primops/tectonix.cc`** + +```cpp +namespace { + +struct PeeledZonePath { + std::optional hostPath; + std::string localPath; + + bool isInternal() const { return hostPath.has_value(); } +}; + +PeeledZonePath peelZonePath(std::string_view path) { + auto pos = path.rfind("/_internal/"); + if (pos == std::string_view::npos) { + return {.hostPath = std::nullopt, .localPath = std::string(path)}; + } + return { + .hostPath = std::string(path.substr(0, pos)), + .localPath = std::string(path.substr(pos + 11)) + }; +} + +} // anonymous namespace +``` + +### Phase 2: Internal Manifest Reading + +**Add to `src/libexpr/primops/tectonix.cc`:** + +```cpp +static std::optional readInternalManifest( + EvalState & state, + const Hash & hostTreeSha) +{ + auto repo = state.getWorldRepo(); + GitAccessorOptions opts{.exportIgnore = false, .smudgeLfs = false}; + auto accessor = repo->getAccessor(hostTreeSha, opts, "host"); + + auto manifestPath = CanonPath("_internal/manifest.json"); + if (!accessor->pathExists(manifestPath)) + return std::nullopt; + + return nlohmann::json::parse(accessor->readFile(manifestPath)); +} +``` + +### Phase 3: Recursive Tree SHA Computation + +**Modify `EvalState::getWorldTreeSha` in `src/libexpr/eval.cc`:** + +```cpp +Hash EvalState::getWorldTreeSha(std::string_view zonePath) const +{ + auto peeled = peelZonePath(zonePath); + + if (!peeled.isInternal()) { + // Existing top-level logic (unchanged) + return computeTreeShaFromWorldRoot(peeled.localPath); + } + + // Internal zone: recursive resolution + auto hostTreeSha = getWorldTreeSha(*peeled.hostPath); + auto repo = getWorldRepo(); + + // Navigate: hostTree -> _internal -> localPath + auto internalTreeSha = repo->getSubtreeSha(hostTreeSha, "_internal"); + + // Walk through localPath segments + for (auto & segment : tokenizeString>(peeled.localPath, "/")) { + internalTreeSha = repo->getSubtreeSha(internalTreeSha, segment); + } + + return internalTreeSha; +} +``` + +### Phase 4: Zone Filtering Accessor + +**Add to `src/libfetchers/filtering-source-accessor.cc` or inline:** + +```cpp +class ZoneFilteringAccessor : public FilteringSourceAccessor { +public: + ZoneFilteringAccessor(ref next) + : FilteringSourceAccessor(std::move(next), makeNotAllowedError) {} + +private: + static MakeNotAllowedError makeNotAllowedError(const CanonPath & path) { + return RestrictedPathError( + fmt("'%s' is hidden (inside _internal)", path)); + } + + bool isAllowed(const CanonPath & path) override { + for (auto it = path.begin(); it != path.end(); ++it) { + if (*it == "_internal") + return false; + } + return true; + } +}; +``` + +### Phase 5: Updated Zone Resolution + +**Modify `prim_unsafeTectonixInternalZone` in `src/libexpr/primops/tectonix.cc`:** + +```cpp +static void prim_unsafeTectonixInternalZone(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto zonePath = state.forceStringNoCtx(*args[0], pos, "..."); + auto peeled = peelZonePath(zonePath); + + // Validate zone exists in appropriate manifest + if (!peeled.isInternal()) { + // Top-level: check root manifest (existing logic) + auto manifest = readRootManifest(state, pos); + if (!manifest.contains(std::string(zonePath))) + state.error("'%s' is not a zone", zonePath).atPos(pos).debugThrow(); + } else { + // Internal: resolve host, check its internal manifest + auto hostTreeSha = state.getWorldTreeSha(*peeled.hostPath); + auto internalManifest = readInternalManifest(state, hostTreeSha); + + if (!internalManifest) + state.error("zone '%s' has no internal manifest", *peeled.hostPath) + .atPos(pos).debugThrow(); + + if (!internalManifest->contains(peeled.localPath)) + state.error("'%s' is not an internal zone of '%s'", + peeled.localPath, *peeled.hostPath).atPos(pos).debugThrow(); + } + + // Get tree SHA (handles recursion internally) + auto treeSha = state.getWorldTreeSha(zonePath); + + // ... rest of existing logic, but wrap accessor with ZoneFilteringAccessor +} +``` + +### Phase 6: Updated `mountZoneByTreeSha` + +**Modify in `src/libexpr/eval.cc`:** + +```cpp +StorePath EvalState::mountZoneByTreeSha(const Hash & treeSha, std::string_view zonePath) +{ + // ... existing cache check ... + + auto repo = getWorldRepo(); + GitAccessorOptions opts{.exportIgnore = true, .smudgeLfs = false}; + auto rawAccessor = repo->getAccessor(treeSha, opts, "zone"); + + // NEW: Wrap with _internal filter + auto accessor = make_ref(rawAccessor); + + // ... rest of existing logic ... +} +``` + +### Phase 7: Dirty Zone Detection for Internal Zones + +**Modify `getTectonixDirtyZones` in `src/libexpr/eval.cc`:** + +This is trickier because we need to: +1. Detect dirty files in the checkout +2. Map them to zones (including internal zones) +3. A file at `a/b/_internal/c/foo.nix` means zone `//a/b/_internal/c` is dirty + +```cpp +// When processing dirty files, check if path contains _internal +// and attribute dirtiness to the correct internal zone + +for (auto & dirtyFile : dirtyFiles) { + auto zonePath = findEnclosingZone(dirtyFile, allManifests); + dirtyZones[zonePath] = true; +} +``` + +--- + +## Summary of Changes + +| Component | Change | +|-----------|--------| +| Zone path parsing | Add `peelZonePath()` function | +| Tree SHA computation | Recursive resolution for internal zones | +| Manifest lookup | Support internal manifests relative to host zones | +| Source accessor | Filter `_internal` at all levels | +| Zone validation | Check appropriate manifest (root vs internal) | +| Dirty detection | Attribute dirty files to correct zone level | + +--- + +## Design Elegance + +The elegance comes from: + +1. **One grammar** for all zone paths +2. **One algorithm** (peel + recurse) for all resolution depths +3. **One filter** (`_internal` everywhere) for all source access +4. **Relative paths** in internal manifests (no duplication of host path) + +--- + +## Edge Cases + +### Zone path with consecutive `_internal` + +`//a/_internal/_internal/b` — This shouldn't happen by design (manifest would declare `_internal/b`, not `_internal`). Should error gracefully. + +### Missing internal manifest + +Error clearly: "Zone X does not have an internal manifest" + +### Zone references itself + +Not possible with the manifest structure. + +### Circular internal zones + +Not possible — each `_internal` is strictly nested deeper. + +### Dirty zone detection for internal zones + +Need to check if the internal zone's files are dirty. The host zone being dirty doesn't mean the internal zone is dirty. + +--- + +## Future Considerations: Access Control + +The design mentions that internal zones are "only readable from the zone that encloses them or their co-internal cousins." This access control could be enforced at: + +1. **Nix expression level** — The code that uses these builtins enforces who can call them +2. **Builtin level** — Add a "caller zone" context and validate access + +This is deferred to a future phase. diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 46393b79c5e..44a62a8c5de 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -24,7 +24,9 @@ #include "nix/fetchers/fetch-to-store.hh" #include "nix/fetchers/tarball.hh" #include "nix/fetchers/input-cache.hh" +#include "nix/fetchers/git-utils.hh" #include "nix/util/current-process.hh" +#include "nix/util/processes.hh" #include "nix/store/async-path-writer.hh" #include "nix/expr/parallel-eval.hh" @@ -330,6 +332,7 @@ EvalState::EvalState( , importResolutionCache(make_ref()) , fileEvalCache(make_ref()) , regexCache(makeRegexCache()) + , worldTreeShaCache(make_ref()) #if NIX_USE_BOEHMGC , baseEnvP(std::allocate_shared(traceable_allocator(), &mem.allocEnv(BASE_ENV_SIZE))) , baseEnv(**baseEnvP) @@ -422,6 +425,553 @@ void EvalState::allowAndSetStorePathString(const StorePath & storePath, Value & mkStorePathString(storePath, v); } +ref EvalState::getWorldRepo() const +{ + if (!worldRepo) { + auto gitDir = settings.tectonixGitDir.get(); + if (gitDir.empty()) + throw Error("--tectonix-git-dir must be specified to use tectonix builtins"); + + // Expand ~ to home directory + if (hasPrefix(gitDir, "~/")) + gitDir = getHome() + gitDir.substr(1); + + worldRepo = GitRepo::openRepo(std::filesystem::path(gitDir), {.bare = true, .odbOnly = true}); + } + return *worldRepo; +} + +ref EvalState::getWorldGitAccessor() const +{ + if (!worldGitAccessor) { + auto sha = settings.tectonixGitSha.get(); + if (sha.empty()) + throw Error("--tectonix-git-sha must be specified to use tectonix builtins"); + + auto repo = getWorldRepo(); + auto hash = Hash::parseNonSRIUnprefixed(sha, HashAlgorithm::SHA1); + + if (!repo->hasObject(hash)) + throw Error("tectonix-git-sha '%s' not found in repository", sha); + + GitAccessorOptions opts{.exportIgnore = false, .smudgeLfs = false}; + worldGitAccessor = repo->getAccessor(hash, opts, "world"); + } + return *worldGitAccessor; +} + +std::optional> EvalState::getWorldCheckoutAccessor() const +{ + if (!isTectonixSourceAvailable()) + return std::nullopt; + + if (!worldCheckoutAccessor) { + auto checkoutPath = settings.tectonixCheckoutPath.get(); + // Use the global filesystem accessor with the checkout path as root + worldCheckoutAccessor = getFSSourceAccessor(); + } + return *worldCheckoutAccessor; +} + +bool EvalState::isTectonixSourceAvailable() const +{ + return !settings.tectonixCheckoutPath.get().empty(); +} + +// ============================================================================ +// Zone Path Parsing (for internal zone support) +// ============================================================================ + +namespace { + +/** + * Result of peeling a zone path into host and local components. + */ +struct PeeledZonePath { + std::optional hostPath; // nullopt for top-level zones + std::string localPath; // The path to look up in manifest + + bool isInternal() const { return hostPath.has_value(); } +}; + +/** + * Peel a zone path to extract the innermost internal zone layer. + * + * Uses rfind to find the rightmost "/_internal/" marker: + * - peel("//a/b/c") → {nullopt, "//a/b/c"} — top-level + * - peel("//a/b/_internal/c") → {"//a/b", "c"} — one level of nesting + * - peel("//a/_internal/b/_internal/c") → {"//a/_internal/b", "c"} — recursive host + */ +PeeledZonePath peelZonePath(std::string_view path) { + constexpr std::string_view marker = "/_internal/"; + auto pos = path.rfind(marker); + + if (pos == std::string_view::npos) { + return {.hostPath = std::nullopt, .localPath = std::string(path)}; + } + + return { + .hostPath = std::string(path.substr(0, pos)), + .localPath = std::string(path.substr(pos + marker.size())) + }; +} + +// ============================================================================ +// Zone Filtering Accessor (filters _internal directories) +// ============================================================================ + +/** + * A filtering source accessor that hides `_internal` directories. + * + * Every zone's source accessor filters out `_internal` directories at every level, + * ensuring that zones are hermetically sealed from their internal zones. + * + * Example: + * - `//a/b/c` sees everything EXCEPT any `_internal` subdirectories + * - `//a/b/c/_internal/d` sees everything EXCEPT any `_internal` subdirectories within it + */ +class ZoneFilteringAccessor : public FilteringSourceAccessor { +public: + ZoneFilteringAccessor(ref next) + : FilteringSourceAccessor(SourcePath(next), makeNotAllowedError) + { + } + +private: + static RestrictedPathError makeNotAllowedError(const CanonPath & path) { + return RestrictedPathError("'%s' is hidden (inside _internal)", path); + } + + bool isAllowed(const CanonPath & path) override { + // Check each path component for _internal + for (auto & component : path) { + if (component == "_internal") + return false; + } + return true; + } +}; + +} // anonymous namespace + +// ============================================================================ +// World Tree SHA Resolution (with internal zone support) +// ============================================================================ + +/** + * Get tree SHA for a world path (top-level zones only, no recursion). + * This is the internal implementation that walks from the git root. + */ +Hash EvalState::getWorldTreeSha(std::string_view worldPath) const +{ + auto peeled = peelZonePath(worldPath); + + if (peeled.isInternal()) { + // Internal zone: resolve host first (recursive!) + auto hostTreeSha = getWorldTreeSha(*peeled.hostPath); + auto repo = getWorldRepo(); + + // Navigate: hostTree -> _internal -> localPath + auto internalTreeSha = repo->getSubtreeSha(hostTreeSha, "_internal"); + + // Walk through localPath segments + Hash currentSha = internalTreeSha; + for (auto & segment : tokenizeString>(peeled.localPath, "/")) { + if (segment.empty()) continue; + currentSha = repo->getSubtreeSha(currentSha, segment); + } + + return currentSha; + } + + // Top-level zone: use original logic + // Normalize path (remove leading //) + std::string path(worldPath); + if (hasPrefix(path, "//")) + path = path.substr(2); + + // Check cache first + if (auto cached = getConcurrent(*worldTreeShaCache, path)) + return *cached; + + // Compute by walking from root + auto repo = getWorldRepo(); + auto sha = settings.tectonixGitSha.get(); + auto commitSha = Hash::parseNonSRIUnprefixed(sha, HashAlgorithm::SHA1); + + // Get the root tree SHA from the commit + auto rootTreeSha = repo->getCommitTree(commitSha); + + // Walk path components, caching intermediate results + Hash currentSha = rootTreeSha; + std::string currentPath; + + // Create an accessor for path validation + GitAccessorOptions opts{.exportIgnore = false, .smudgeLfs = false}; + auto accessor = repo->getAccessor(commitSha, opts, "world-tree"); + + for (auto & component : tokenizeString>(path, "/")) { + if (component.empty()) continue; + + std::string nextPath = currentPath.empty() ? component : currentPath + "/" + component; + + // Check if this level is cached + if (auto cached = getConcurrent(*worldTreeShaCache, nextPath)) { + currentSha = *cached; + currentPath = nextPath; + continue; + } + + // Need to compute: get tree entry for this component + auto fullPath = CanonPath("/" + nextPath); + auto stat = accessor->maybeLstat(fullPath); + + if (!stat || stat->type != SourceAccessor::Type::tDirectory) + throw Error("path '%s' does not exist or is not a directory in world", nextPath); + + // Get the tree SHA for this subtree + currentSha = repo->getSubtreeSha(currentSha, component); + + // Cache this level + worldTreeShaCache->try_emplace(nextPath, currentSha); + currentPath = nextPath; + } + + return currentSha; +} + +const std::set & EvalState::getTectonixSparseCheckoutRoots() const +{ + if (tectonixSparseCheckoutRoots) + return *tectonixSparseCheckoutRoots; + + std::set roots; + + if (isTectonixSourceAvailable()) { + auto checkoutPath = settings.tectonixCheckoutPath.get(); + + // Read .git to find the actual git directory + // It can be either a directory or a file containing "gitdir: " + auto dotGitPath = std::filesystem::path(checkoutPath) / ".git"; + std::filesystem::path gitDir; + + if (std::filesystem::is_directory(dotGitPath)) { + gitDir = dotGitPath; + } else if (std::filesystem::is_regular_file(dotGitPath)) { + auto gitdirContent = readFile(dotGitPath.string()); + // Parse "gitdir: \n" + if (hasPrefix(gitdirContent, "gitdir: ")) { + auto path = trim(gitdirContent.substr(8)); + gitDir = std::filesystem::path(path); + // Handle relative paths + if (gitDir.is_relative()) + gitDir = std::filesystem::path(checkoutPath) / gitDir; + } + } + + if (!gitDir.empty()) { + // Read sparse-checkout-roots + auto sparseRootsPath = gitDir / "info" / "sparse-checkout-roots"; + if (std::filesystem::exists(sparseRootsPath)) { + auto content = readFile(sparseRootsPath.string()); + for (auto & line : tokenizeString>(content, "\n")) { + auto trimmed = trim(line); + if (!trimmed.empty()) + roots.insert(std::string(trimmed)); + } + } + } + } + + tectonixSparseCheckoutRoots = std::move(roots); + return *tectonixSparseCheckoutRoots; +} + +const std::map & EvalState::getTectonixDirtyZones() const +{ + if (tectonixDirtyZones) + return *tectonixDirtyZones; + + std::map dirtyZones; + + if (isTectonixSourceAvailable()) { + // Get sparse checkout roots (zone IDs) + auto & sparseRoots = getTectonixSparseCheckoutRoots(); + + if (!sparseRoots.empty()) { + // Read manifest to get zone ID -> zone path mapping + auto gitDir = settings.tectonixGitDir.get(); + if (hasPrefix(gitDir, "~/")) + gitDir = getHome() + gitDir.substr(1); + + auto sha = settings.tectonixGitSha.get(); + if (!gitDir.empty() && !sha.empty()) { + auto repo = getWorldRepo(); + auto hash = Hash::parseNonSRIUnprefixed(sha, HashAlgorithm::SHA1); + + if (repo->hasObject(hash)) { + GitAccessorOptions opts{.exportIgnore = false, .smudgeLfs = false}; + auto accessor = repo->getAccessor(hash, opts, "world"); + + auto manifestPath = CanonPath("/.meta/manifest.json"); + if (accessor->pathExists(manifestPath)) { + auto manifestContent = accessor->readFile(manifestPath); + auto manifest = nlohmann::json::parse(manifestContent); + + // Build map of zone ID -> zone path for sparse roots only + std::map zoneIdToPath; + for (auto & [path, value] : manifest.items()) { + auto & id = value.at("id").get_ref(); + if (sparseRoots.count(id)) + zoneIdToPath[id] = path; + } + + // Initialize all sparse-checked-out zones as not dirty + for (auto & [zoneId, zonePath] : zoneIdToPath) { + dirtyZones[zonePath] = false; + } + + // Helper function to recursively discover and register internal zones + std::function discoverInternalZones; + discoverInternalZones = [&](const std::string & hostZonePath, const Hash & hostTreeSha) { + // Check if host zone has an internal manifest + auto hostAccessor = repo->getAccessor(hostTreeSha, opts, "host"); + auto internalManifestPath = CanonPath("_internal/manifest.json"); + + if (!hostAccessor->pathExists(internalManifestPath)) + return; + + auto internalManifestContent = hostAccessor->readFile(internalManifestPath); + auto internalManifest = nlohmann::json::parse(internalManifestContent); + + // Register each internal zone + for (auto & [localPath, value] : internalManifest.items()) { + std::string internalZonePath = hostZonePath + "/_internal/" + localPath; + dirtyZones[internalZonePath] = false; + + // Recursively discover nested internal zones + try { + auto internalTreeSha = repo->getSubtreeSha(hostTreeSha, "_internal"); + for (auto & segment : tokenizeString>(localPath, "/")) { + if (!segment.empty()) + internalTreeSha = repo->getSubtreeSha(internalTreeSha, segment); + } + discoverInternalZones(internalZonePath, internalTreeSha); + } catch (...) { + // Internal zone tree not found, skip recursion + } + } + }; + + // Discover internal zones for each top-level zone + for (auto & [zoneId, zonePath] : zoneIdToPath) { + try { + auto zoneTreeSha = getWorldTreeSha(zonePath); + discoverInternalZones(zonePath, zoneTreeSha); + } catch (...) { + // Zone tree not found, skip internal zone discovery + } + } + + // Get dirty files via git status (avoids libgit2 reftables issue) + auto checkoutPath = settings.tectonixCheckoutPath.get(); + auto gitStatusOutput = runProgram("git", true, {"-C", checkoutPath, "status", "--porcelain"}); + + for (auto & line : tokenizeString>(gitStatusOutput, "\n")) { + if (line.size() < 4) continue; // Skip empty/malformed lines + // Format: "XY filename" where XY is 2-char status + auto filePath = "/" + std::string(line.substr(3)); + + // Find the most specific zone this file belongs to + // (internal zones are more specific than their host zones) + std::string bestMatch; + for (auto & [zonePath, dirty] : dirtyZones) { + // Normalize zone path for comparison (remove leading //) + std::string normalizedZonePath = zonePath; + if (hasPrefix(normalizedZonePath, "//")) + normalizedZonePath = normalizedZonePath.substr(1); // keep one / + + if (hasPrefix(filePath, normalizedZonePath + "/") || filePath == normalizedZonePath) { + // Prefer longer (more specific) matches + if (normalizedZonePath.size() > bestMatch.size()) { + bestMatch = zonePath; + } + } + } + + if (!bestMatch.empty()) { + dirtyZones[bestMatch] = true; + } + } + } + } + } + } + } + + tectonixDirtyZones = std::move(dirtyZones); + return *tectonixDirtyZones; +} + +StorePath EvalState::getZoneStorePath(std::string_view zonePath) +{ + // Normalize path + std::string zone(zonePath); + if (hasPrefix(zone, "//")) + zone = zone.substr(2); + + // Check dirty status + bool isDirty = false; + if (isTectonixSourceAvailable()) { + auto & dirtyZones = getTectonixDirtyZones(); + auto it = dirtyZones.find(std::string(zonePath)); + isDirty = it != dirtyZones.end() && it->second; + } + + if (isDirty) { + // EXTENSION POINT: For now, always eager from checkout + return getZoneFromCheckout(zonePath); + } + + // Clean zone: get tree SHA + auto treeSha = getWorldTreeSha(zonePath); + + if (!settings.lazyTrees) { + // Eager mode: immediate copy from git ODB + auto repo = getWorldRepo(); + GitAccessorOptions opts{.exportIgnore = true, .smudgeLfs = false}; + auto rawAccessor = repo->getAccessor(treeSha, opts, "zone"); + + // Wrap with _internal filter to hide internal zones + auto accessor = make_ref(rawAccessor); + + std::string name = "zone-" + replaceStrings(zone, "/", "-"); + auto storePath = fetchToStore( + fetchSettings, *store, + SourcePath(accessor, CanonPath::root), + FetchMode::Copy, name); + + allowPath(storePath); + return storePath; + } + + // Lazy mode: mount by tree SHA + return mountZoneByTreeSha(treeSha, zonePath); +} + +StorePath EvalState::mountZoneByTreeSha(const Hash & treeSha, std::string_view zonePath) +{ + // Check cache first (thread-safe) + { + auto cache = tectonixZoneCache_.readLock(); + auto it = cache->find(treeSha); + if (it != cache->end()) { + debug("zone cache hit for tree %s", treeSha.gitRev()); + return it->second; + } + } + + // Not cached: create accessor and mount + auto repo = getWorldRepo(); + GitAccessorOptions opts{.exportIgnore = true, .smudgeLfs = false}; + auto rawAccessor = repo->getAccessor(treeSha, opts, "zone"); + + // Wrap with _internal filter to hide internal zones from this zone's view + auto accessor = make_ref(rawAccessor); + + // Generate name from zone path + std::string zone(zonePath); + if (hasPrefix(zone, "//")) + zone = zone.substr(2); + std::string name = "zone-" + replaceStrings(zone, "/", "-"); + + // Create virtual store path + auto storePath = StorePath::random(name); + allowPath(storePath); + + // Mount the filtered accessor at this path + storeFS->mount(CanonPath(store->printStorePath(storePath)), accessor); + + // Cache it (thread-safe) + { + auto cache = tectonixZoneCache_.lock(); + auto [it, inserted] = cache->try_emplace(treeSha, storePath); + if (!inserted) { + // Another thread beat us, use their path + return it->second; + } + } + + debug("mounted zone %s (tree %s) at %s", + zonePath, treeSha.gitRev(), store->printStorePath(storePath)); + + return storePath; +} + +StorePath EvalState::getZoneFromCheckout(std::string_view zonePath) +{ + std::string zone(zonePath); + if (hasPrefix(zone, "//")) + zone = zone.substr(2); + + std::string name = "zone-" + replaceStrings(zone, "/", "-"); + auto checkoutPath = settings.tectonixCheckoutPath.get(); + auto fullPath = std::filesystem::path(checkoutPath) / zone; + + if (!settings.lazyTrees) { + // Eager mode: immediate copy from checkout + // Create an accessor rooted at the zone directory + auto rawAccessor = makeFSSourceAccessor(fullPath); + + // Wrap with _internal filter to hide internal zones + auto accessor = make_ref(rawAccessor); + + auto storePath = fetchToStore( + fetchSettings, *store, + SourcePath(accessor, CanonPath::root), + FetchMode::Copy, name); + + allowPath(storePath); + return storePath; + } + + // Lazy mode: check cache first (thread-safe) + { + auto cache = tectonixCheckoutZoneCache_.readLock(); + auto it = cache->find(std::string(zonePath)); + if (it != cache->end()) { + debug("checkout zone cache hit for %s", zonePath); + return it->second; + } + } + + // Not cached: create accessor rooted at zone directory and mount + auto rawAccessor = makeFSSourceAccessor(fullPath); + + // Wrap with _internal filter to hide internal zones from this zone's view + auto accessor = make_ref(rawAccessor); + + // Create virtual store path + auto storePath = StorePath::random(name); + allowPath(storePath); + + // Mount the filtered accessor at this path + storeFS->mount(CanonPath(store->printStorePath(storePath)), accessor); + + // Cache it (thread-safe) + { + auto cache = tectonixCheckoutZoneCache_.lock(); + auto [it, inserted] = cache->try_emplace(std::string(zonePath), storePath); + if (!inserted) { + // Another thread beat us, use their path + return it->second; + } + } + + debug("mounted checkout zone %s at %s", zonePath, store->printStorePath(storePath)); + return storePath; +} + inline static bool isJustSchemePrefix(std::string_view prefix) { return !prefix.empty() && prefix[prefix.size() - 1] == ':' diff --git a/src/libexpr/include/nix/expr/eval-settings.hh b/src/libexpr/include/nix/expr/eval-settings.hh index f367541ec2f..58138e86fbb 100644 --- a/src/libexpr/include/nix/expr/eval-settings.hh +++ b/src/libexpr/include/nix/expr/eval-settings.hh @@ -399,6 +399,41 @@ struct EvalSettings : Config Note that enabling the debugger (`--debugger`) disables multi-threaded evaluation. )"}; + + Setting tectonixGitDir{ + this, + "", + "tectonix-git-dir", + R"( + Path to the git directory for tectonix builtins (e.g., `~/world/git`). + + This enables the tectonix builtins (`builtins.unsafeTectonixInternalTreeSha`, `builtins.unsafeTectonixInternalTree`, + `builtins.unsafeTectonixInternalFile`, `builtins.unsafeTectonixInternalZoneSrc`, `builtins.unsafeTectonixInternalDir`) which provide + native access to files from a git repository during Nix evaluation. + )"}; + + Setting tectonixGitSha{ + this, + "", + "tectonix-git-sha", + R"( + Git commit SHA to use for tectonix builtins. + + This specifies the commit to read from when using tectonix builtins. + Typically set to HEAD of the repository. + )"}; + + Setting tectonixCheckoutPath{ + this, + "", + "tectonix-checkout-path", + R"( + Path to checkout directory for source-available mode. + + When set, uncommitted files in the checkout are preferred over git content + for tectonix builtins. This enables local development workflows where changes + are visible before committing. + )"}; }; /** diff --git a/src/libexpr/include/nix/expr/eval.hh b/src/libexpr/include/nix/expr/eval.hh index c9cfb1a573b..93875abda2a 100644 --- a/src/libexpr/include/nix/expr/eval.hh +++ b/src/libexpr/include/nix/expr/eval.hh @@ -26,6 +26,7 @@ #include #include +#include #include namespace nix { @@ -52,6 +53,7 @@ enum RepairFlag : bool; struct MemorySourceAccessor; struct MountedSourceAccessor; struct AsyncPathWriter; +struct GitRepo; namespace eval_cache { class EvalCache; @@ -513,6 +515,48 @@ private: */ const ref regexCache; + /** Lazy-initialized git repository for world builtins */ + mutable std::optional> worldRepo; + + /** Lazy-initialized source accessor for world git content */ + mutable std::optional> worldGitAccessor; + + /** Lazy-initialized source accessor for world checkout (source-available mode) */ + mutable std::optional> worldCheckoutAccessor; + + /** Cache: world path → tree SHA (lazy computed, cached at each path level) */ + const ref> worldTreeShaCache; + + /** Lazy-initialized set of zone IDs in sparse checkout */ + mutable std::optional> tectonixSparseCheckoutRoots; + + /** Lazy-initialized map of zone path → dirty status (only for sparse-checked-out zones) */ + mutable std::optional> tectonixDirtyZones; + + /** + * Cache tree SHA → virtual store path for lazy zone mounts. + * Thread-safe for eval-cores > 1. + */ + mutable SharedSync> tectonixZoneCache_; + + /** + * Cache zone path → virtual store path for lazy checkout zone mounts. + * Thread-safe for eval-cores > 1. + */ + mutable SharedSync> tectonixCheckoutZoneCache_; + + /** + * Mount a zone by tree SHA, returning a (potentially virtual) store path. + * Caches by tree SHA for deduplication across world revisions. + */ + StorePath mountZoneByTreeSha(const Hash & treeSha, std::string_view zonePath); + + /** + * Get zone store path from checkout (for dirty zones). + * With lazy-trees enabled, mounts lazily and caches by zone path. + */ + StorePath getZoneFromCheckout(std::string_view zonePath); + public: /** @@ -544,6 +588,36 @@ public: return lookupPath; } + /** Get the world git repository, initializing lazily */ + ref getWorldRepo() const; + + /** Get accessor for world git content at worldSha */ + ref getWorldGitAccessor() const; + + /** Get accessor for world checkout (only in source-available mode) */ + std::optional> getWorldCheckoutAccessor() const; + + /** Get tree SHA for a world path, with lazy caching */ + Hash getWorldTreeSha(std::string_view worldPath) const; + + /** Check if we're in source-available mode */ + bool isTectonixSourceAvailable() const; + + /** Get set of zone IDs in sparse checkout (source-available mode only) */ + const std::set & getTectonixSparseCheckoutRoots() const; + + /** Get map of zone path → dirty status (only for sparse-checked-out zones) */ + const std::map & getTectonixDirtyZones() const; + + /** + * Get a zone's store path, handling dirty detection and lazy mounting. + * + * For clean zones with lazy-trees enabled: mounts accessor lazily + * For dirty zones: currently eager-copies from checkout (extension point) + * For lazy-trees disabled: eager-copies from git + */ + StorePath getZoneStorePath(std::string_view zonePath); + /** * Return a `SourcePath` that refers to `path` in the root * filesystem. diff --git a/src/libexpr/primops/meson.build b/src/libexpr/primops/meson.build index b8abc6409af..5d948a49c3f 100644 --- a/src/libexpr/primops/meson.build +++ b/src/libexpr/primops/meson.build @@ -9,4 +9,5 @@ sources += files( 'fetchMercurial.cc', 'fetchTree.cc', 'fromTOML.cc', + 'tectonix.cc', ) diff --git a/src/libexpr/primops/tectonix.cc b/src/libexpr/primops/tectonix.cc new file mode 100644 index 00000000000..765611151ac --- /dev/null +++ b/src/libexpr/primops/tectonix.cc @@ -0,0 +1,438 @@ +#include "nix/expr/primops.hh" +#include "nix/expr/eval-inline.hh" +#include "nix/expr/eval-settings.hh" +#include "nix/fetchers/git-utils.hh" +#include "nix/store/store-api.hh" +#include "nix/fetchers/fetch-to-store.hh" + +#include + +namespace nix { + +// ============================================================================ +// Zone Path Parsing Infrastructure +// ============================================================================ + +/** + * Result of peeling a zone path into host and local components. + * + * For top-level zones like "//a/b/c", hostPath is nullopt and localPath is "//a/b/c". + * For internal zones like "//a/b/_internal/c", hostPath is "//a/b" and localPath is "c". + * For nested internal zones like "//a/_internal/b/_internal/c", hostPath is "//a/_internal/b" and localPath is "c". + */ +struct PeeledZonePath { + std::optional hostPath; // nullopt for top-level zones + std::string localPath; // The path to look up in manifest + + bool isInternal() const { return hostPath.has_value(); } +}; + +/** + * Peel a zone path to extract the innermost internal zone layer. + * + * Uses rfind to find the rightmost "/_internal/" marker: + * - peel("//a/b/c") → {nullopt, "//a/b/c"} — top-level + * - peel("//a/b/_internal/c") → {"//a/b", "c"} — one level of nesting + * - peel("//a/_internal/b/_internal/c") → {"//a/_internal/b", "c"} — recursive host + */ +static PeeledZonePath peelZonePath(std::string_view path) { + constexpr std::string_view marker = "/_internal/"; + auto pos = path.rfind(marker); + + if (pos == std::string_view::npos) { + return {.hostPath = std::nullopt, .localPath = std::string(path)}; + } + + return { + .hostPath = std::string(path.substr(0, pos)), + .localPath = std::string(path.substr(pos + marker.size())) + }; +} + +// ============================================================================ +// Internal Manifest Reading +// ============================================================================ + +/** + * Read the internal manifest from a host zone's tree. + * + * Internal manifests are located at `_internal/manifest.json` within the host zone + * and contain relative paths (no // prefix) mapping to zone IDs. + * + * Example internal manifest: + * { + * "helpers": {"id": "W-def000"}, + * "test-utils": {"id": "W-def001"}, + * "deeply/nested/thing": {"id": "W-def002"} + * } + * + * @param state The evaluation state + * @param hostTreeSha The tree SHA of the host zone + * @return The parsed internal manifest JSON, or nullopt if no internal manifest exists + */ +static std::optional readInternalManifest( + EvalState & state, + const Hash & hostTreeSha) +{ + auto repo = state.getWorldRepo(); + GitAccessorOptions opts{.exportIgnore = false, .smudgeLfs = false}; + auto accessor = repo->getAccessor(hostTreeSha, opts, "host"); + + auto manifestPath = CanonPath("_internal/manifest.json"); + if (!accessor->pathExists(manifestPath)) + return std::nullopt; + + return nlohmann::json::parse(accessor->readFile(manifestPath)); +} + +// Helper to read the manifest JSON content +static std::string readManifestContent(EvalState & state, const PosIdx pos) +{ + auto fullPath = CanonPath("/.meta/manifest.json"); + + // In source-available mode, check checkout first + if (state.isTectonixSourceAvailable()) { + auto checkoutAccessor = state.getWorldCheckoutAccessor(); + if (checkoutAccessor) { + auto checkoutPath = state.settings.tectonixCheckoutPath.get(); + auto checkoutFullPath = CanonPath(checkoutPath + fullPath.abs()); + if ((*checkoutAccessor)->pathExists(checkoutFullPath)) { + return (*checkoutAccessor)->readFile(checkoutFullPath); + } + } + } + + // Fall back to git + auto accessor = state.getWorldGitAccessor(); + if (!accessor->pathExists(fullPath)) + state.error("manifest.json does not exist at //.meta/manifest.json in world") + .atPos(pos).debugThrow(); + + return accessor->readFile(fullPath); +} + +// ============================================================================ +// builtins.worldManifest +// Returns path -> zoneId mapping from //.meta/manifest.json +// ============================================================================ +static void prim_worldManifest(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto content = readManifestContent(state, pos); + auto json = nlohmann::json::parse(content); + + auto attrs = state.buildBindings(json.size()); + for (auto & [path, value] : json.items()) { + auto & id = value.at("id"); + attrs.alloc(state.symbols.create(path)).mkString(id.get(), state.mem); + } + v.mkAttrs(attrs); +} + +static RegisterPrimOp primop_worldManifest({ + .name = "__unsafeTectonixInternalManifest", + .args = {}, + .doc = R"( + Get the world manifest as a Nix attrset mapping zone paths to zone IDs. + + Example: `builtins.unsafeTectonixInternalManifest."//areas/tools/dev"` returns `"W-123456"`. + + Requires `--tectonix-git-dir` and `--tectonix-git-sha` to be set. + )", + .fun = prim_worldManifest, +}); + +// ============================================================================ +// builtins.worldManifestInverted +// Returns zoneId -> path mapping (inverse of worldManifest) +// ============================================================================ +static void prim_worldManifestInverted(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto content = readManifestContent(state, pos); + auto json = nlohmann::json::parse(content); + + auto attrs = state.buildBindings(json.size()); + for (auto & [path, value] : json.items()) { + auto & id = value.at("id"); + attrs.alloc(state.symbols.create(id.get())).mkString(path, state.mem); + } + v.mkAttrs(attrs); +} + +static RegisterPrimOp primop_worldManifestInverted({ + .name = "__unsafeTectonixInternalManifestInverted", + .args = {}, + .doc = R"( + Get the inverted world manifest as a Nix attrset mapping zone IDs to zone paths. + + Example: `builtins.unsafeTectonixInternalManifestInverted."W-123456"` returns `"//areas/tools/dev"`. + + Requires `--tectonix-git-dir` and `--tectonix-git-sha` to be set. + )", + .fun = prim_worldManifestInverted, +}); + +// ============================================================================ +// builtins.unsafeTectonixInternalTreeSha worldPath +// Returns the git tree SHA for a world path +// ============================================================================ +static void prim_unsafeTectonixInternalTreeSha(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto worldPath = state.forceStringNoCtx(*args[0], pos, + "while evaluating the 'worldPath' argument to builtins.unsafeTectonixInternalTreeSha"); + + auto sha = state.getWorldTreeSha(worldPath); + v.mkString(sha.gitRev(), state.mem); +} + +static RegisterPrimOp primop_unsafeTectonixInternalTreeSha({ + .name = "__unsafeTectonixInternalTreeSha", + .args = {"worldPath"}, + .doc = R"( + Get the git tree SHA for a path in the world repository. + + Example: `builtins.unsafeTectonixInternalTreeSha "//areas/tools/tec"` returns the tree SHA + for that zone. + + Requires `--tectonix-git-dir` and `--tectonix-git-sha` to be set. + )", + .fun = prim_unsafeTectonixInternalTreeSha, +}); + +// ============================================================================ +// builtins.unsafeTectonixInternalTree treeSha +// Returns a store path containing the tree contents +// ============================================================================ +static void prim_unsafeTectonixInternalTree(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto treeSha = state.forceStringNoCtx(*args[0], pos, + "while evaluating the 'treeSha' argument to builtins.unsafeTectonixInternalTree"); + + auto repo = state.getWorldRepo(); + auto hash = Hash::parseNonSRIUnprefixed(treeSha, HashAlgorithm::SHA1); + + if (!repo->hasObject(hash)) + state.error("tree SHA '%s' not found in world repository", treeSha) + .atPos(pos).debugThrow(); + + GitAccessorOptions opts{.exportIgnore = false, .smudgeLfs = false}; + auto accessor = repo->getAccessor(hash, opts, "world-tree"); + + auto storePath = fetchToStore( + state.fetchSettings, + *state.store, + SourcePath(accessor, CanonPath::root), + FetchMode::Copy, + "world-tree-" + std::string(treeSha).substr(0, 8)); + + state.allowAndSetStorePathString(storePath, v); +} + +static RegisterPrimOp primop_unsafeTectonixInternalTree({ + .name = "__unsafeTectonixInternalTree", + .args = {"treeSha"}, + .doc = R"( + Fetch a git tree by SHA from the world repository and return it as a store path. + + Example: `builtins.unsafeTectonixInternalTree "abc123..."` returns `/nix/store/...-world-tree-abc123`. + + Requires `--tectonix-git-dir` to be set. + )", + .fun = prim_unsafeTectonixInternalTree, +}); + +// ============================================================================ +// builtins.unsafeTectonixInternalZoneSrc zonePath +// Returns a store path containing the zone source +// With lazy-trees enabled, returns a virtual store path that is only +// materialized when used as a derivation input. +// ============================================================================ +static void prim_unsafeTectonixInternalZoneSrc(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto zonePath = state.forceStringNoCtx(*args[0], pos, + "while evaluating the 'zonePath' argument to builtins.unsafeTectonixInternalZoneSrc"); + + auto storePath = state.getZoneStorePath(zonePath); + state.allowAndSetStorePathString(storePath, v); +} + +static RegisterPrimOp primop_unsafeTectonixInternalZoneSrc({ + .name = "__unsafeTectonixInternalZoneSrc", + .args = {"zonePath"}, + .doc = R"( + Get the source of a zone as a store path. + + With `lazy-trees = true`, returns a virtual store path that is only + materialized when used as a derivation input (devirtualized). + + In source-available mode with uncommitted changes, uses checkout content + (always eager for dirty zones). + + Example: `builtins.unsafeTectonixInternalZoneSrc "//areas/tools/tec"` + + Requires `--tectonix-git-dir` and `--tectonix-git-sha` to be set. + )", + .fun = prim_unsafeTectonixInternalZoneSrc, +}); + +// ============================================================================ +// builtins.unsafeTectonixInternalSparseCheckoutRoots +// Returns list of zone IDs in sparse checkout +// ============================================================================ +static void prim_unsafeTectonixInternalSparseCheckoutRoots(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto & roots = state.getTectonixSparseCheckoutRoots(); + + auto list = state.buildList(roots.size()); + size_t i = 0; + for (const auto & root : roots) { + (list[i++] = state.allocValue())->mkString(root, state.mem); + } + v.mkList(list); +} + +static RegisterPrimOp primop_unsafeTectonixInternalSparseCheckoutRoots({ + .name = "__unsafeTectonixInternalSparseCheckoutRoots", + .args = {}, + .doc = R"( + Get the list of zone IDs that are in the sparse checkout. + + Returns an empty list if not in source-available mode or if no + sparse-checkout-roots file exists. + + Example: `builtins.unsafeTectonixInternalSparseCheckoutRoots` returns `["W-000000" "W-1337af" ...]`. + + Requires `--tectonix-checkout-path` to be set. + )", + .fun = prim_unsafeTectonixInternalSparseCheckoutRoots, +}); + +// ============================================================================ +// builtins.unsafeTectonixInternalDirtyZones +// Returns map of zone paths to dirty status +// ============================================================================ +static void prim_unsafeTectonixInternalDirtyZones(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto & dirtyZones = state.getTectonixDirtyZones(); + + auto attrs = state.buildBindings(dirtyZones.size()); + for (const auto & [zonePath, dirty] : dirtyZones) { + attrs.alloc(state.symbols.create(zonePath)).mkBool(dirty); + } + v.mkAttrs(attrs); +} + +static RegisterPrimOp primop_unsafeTectonixInternalDirtyZones({ + .name = "__unsafeTectonixInternalDirtyZones", + .args = {}, + .doc = R"( + Get the dirty status of zones in the sparse checkout. + + Returns an attrset mapping zone paths to booleans indicating whether + the zone has uncommitted changes. + + Only includes zones that are in the sparse checkout. + + Example: `builtins.unsafeTectonixInternalDirtyZones."//areas/tools/dev"` returns `true` or `false`. + + Requires `--tectonix-checkout-path` to be set. + )", + .fun = prim_unsafeTectonixInternalDirtyZones, +}); + +// ============================================================================ +// builtins.__unsafeTectonixInternalZone zonePath +// Returns an attrset with zone info (flake-like interface) +// ============================================================================ +static void prim_unsafeTectonixInternalZone(EvalState & state, const PosIdx pos, Value ** args, Value & v) +{ + auto zonePath = state.forceStringNoCtx(*args[0], pos, + "while evaluating the 'zonePath' argument to builtins.__unsafeTectonixInternalZone"); + + // Peel the zone path to determine if it's top-level or internal + auto peeled = peelZonePath(zonePath); + + // Validate that zonePath exists in the appropriate manifest + if (!peeled.isInternal()) { + // Top-level zone: check root manifest + auto content = readManifestContent(state, pos); + auto manifest = nlohmann::json::parse(content); + if (!manifest.contains(std::string(zonePath))) + state.error("'%s' is not a zone root (must be an exact path from the manifest)", zonePath) + .atPos(pos).debugThrow(); + } else { + // Internal zone: resolve host zone and check its internal manifest + auto hostTreeSha = state.getWorldTreeSha(*peeled.hostPath); + auto internalManifest = readInternalManifest(state, hostTreeSha); + + if (!internalManifest) + state.error("zone '%s' has no internal manifest", *peeled.hostPath) + .atPos(pos).debugThrow(); + + if (!internalManifest->contains(peeled.localPath)) + state.error("'%s' is not an internal zone of '%s'", + peeled.localPath, *peeled.hostPath) + .atPos(pos).debugThrow(); + } + + // Get tree SHA (handles recursion internally) + auto treeSha = state.getWorldTreeSha(zonePath); + + // Check dirty status + bool isDirty = false; + if (state.isTectonixSourceAvailable()) { + auto & dirtyZones = state.getTectonixDirtyZones(); + auto it = dirtyZones.find(std::string(zonePath)); + isDirty = it != dirtyZones.end() && it->second; + } + + auto storePath = state.getZoneStorePath(zonePath); + auto storePathStr = state.store->printStorePath(storePath); + + // Build result attrset (like fetchTree) + auto attrs = state.buildBindings(5); + + // outPath: string with context (for use as derivation src) + attrs.alloc("outPath").mkString(storePathStr, { + NixStringContextElem::Opaque{storePath} + }, state.mem); + + // root: path value (for reading files without devirtualization) + attrs.alloc("root").mkPath( + state.rootPath(CanonPath(storePathStr)), state.mem); + + attrs.alloc("treeSha").mkString(treeSha.gitRev(), state.mem); + attrs.alloc("zonePath").mkString(zonePath, state.mem); + attrs.alloc("dirty").mkBool(isDirty); + + v.mkAttrs(attrs); +} + +static RegisterPrimOp primop_unsafeTectonixInternalZone({ + .name = "__unsafeTectonixInternalZone", + .args = {"zonePath"}, + .doc = R"( + Get a zone from the world repository. + + Returns an attrset with: + - outPath: Store path string with context (for use as derivation src) + - root: Path value for reading files (no devirtualization) + - treeSha: Git tree SHA for this zone + - zonePath: The zone path argument + - dirty: Whether the zone has uncommitted changes + + With `lazy-trees = true`, the zone is mounted lazily. Use `root` to + read files without triggering a copy to the store: + + let zone = builtins.__unsafeTectonixInternalZone "//areas/tools/tec"; + in import (zone.root + "/zone.nix") + + Use `outPath` as derivation src (triggers copy at build time): + + mkDerivation { src = zone.outPath; } + + Requires `--tectonix-git-dir` and `--tectonix-git-sha` to be set. + )", + .fun = prim_unsafeTectonixInternalZone, +}); + +} // namespace nix diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index f21313a1040..3028a76eeba 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -108,6 +109,13 @@ static void initLibGit2() std::call_once(initialized, []() { if (git_libgit2_init() < 0) throw Error("initialising libgit2: %s", git_error_last()->message); + + // Register support for additional git extensions. + // This allows opening repos with extensions that libgit2 doesn't natively support, + // as long as we don't actually need the extension's functionality. + // "refstorage" is used by reftables - we can ignore it since we only access objects by SHA. + const char * extensions[] = { "refstorage" }; + git_libgit2_opts(GIT_OPT_SET_EXTENSIONS, extensions, 1); }); } @@ -265,6 +273,23 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this { initLibGit2(); + if (options.odbOnly) { + /* Open only the object database, bypassing full repository validation. + This is useful for repositories with unsupported extensions like reftables. + We create a fake repository wrapping the ODB for API compatibility. */ + + git_odb * odb = nullptr; + if (git_odb_open(&odb, (path / "objects").string().c_str())) + throw Error("opening Git object database %s: %s", path / "objects", git_error_last()->message); + + // git_repository_wrap_odb takes ownership of the ODB + if (git_repository_wrap_odb(Setter(repo), odb)) + throw Error("wrapping Git object database: %s", git_error_last()->message); + + // No mempack backend needed for read-only ODB access + return; + } + initRepoAtomically(path, options); if (git_repository_open(Setter(repo), path.string().c_str())) throw Error("opening Git repository %s: %s", path, git_error_last()->message); @@ -595,6 +620,66 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this return true; } + Hash getSubtreeSha(const Hash & treeSha, const std::string & entryName) override + { + git_tree * tree = nullptr; + auto oid = hashToOID(treeSha); + + if (git_tree_lookup(&tree, *this, &oid)) + throw Error("looking up tree %s: %s", treeSha.gitRev(), git_error_last()->message); + + Finally freeTree([&]() { git_tree_free(tree); }); + + auto entry = git_tree_entry_byname(tree, entryName.c_str()); + if (!entry) + throw Error("entry '%s' not found in tree %s", entryName, treeSha.gitRev()); + + return toHash(*git_tree_entry_id(entry)); + } + + Hash getCommitTree(const Hash & commitSha) override + { + auto oid = hashToOID(commitSha); + auto obj = lookupObject(*this, oid); + auto tree = peelObject(obj.get(), GIT_OBJECT_TREE); + return toHash(*git_object_id(tree.get())); + } + + std::set getDirtyFilesAgainstTree(const Hash & commitSha, const std::filesystem::path & workdirPath) override + { + std::set dirtyFiles; + + // Set workdir on the repo + if (git_repository_set_workdir(repo.get(), workdirPath.string().c_str(), 0)) + throw Error("setting workdir on repository: %s", git_error_last()->message); + + // Get tree from commit + auto oid = hashToOID(commitSha); + auto obj = lookupObject(*this, oid); + auto tree = peelObject(obj.get(), GIT_OBJECT_TREE); + + // Create diff between tree and workdir, using index for faster stat cache + git_diff * diff = nullptr; + git_diff_options opts = GIT_DIFF_OPTIONS_INIT; + opts.flags = GIT_DIFF_INCLUDE_UNTRACKED | GIT_DIFF_RECURSE_UNTRACKED_DIRS; + + if (git_diff_tree_to_workdir_with_index(&diff, repo.get(), tree.get(), &opts)) + throw Error("creating diff: %s", git_error_last()->message); + + // Collect dirty file paths + size_t numDeltas = git_diff_num_deltas(diff); + for (size_t i = 0; i < numDeltas; i++) { + const git_diff_delta * delta = git_diff_get_delta(diff, i); + // Use new_file path for adds/modifies, old_file for deletes + const char * path = delta->new_file.path ? delta->new_file.path : delta->old_file.path; + if (path) + dirtyFiles.insert(CanonPath(path)); + } + + git_diff_free(diff); + return dirtyFiles; + } + /** * A 'GitSourceAccessor' with no regard for export-ignore. */ @@ -876,8 +961,18 @@ struct GitSourceAccessor : SourceAccessor for (size_t n = 0; n < count; ++n) { auto entry = git_tree_entry_byindex(tree.get(), n); + auto mode = git_tree_entry_filemode(entry); + std::optional type; + if (mode == GIT_FILEMODE_TREE) + type = Type::tDirectory; + else if (mode == GIT_FILEMODE_BLOB || mode == GIT_FILEMODE_BLOB_EXECUTABLE) + type = Type::tRegular; + else if (mode == GIT_FILEMODE_LINK) + type = Type::tSymlink; + else if (mode == GIT_FILEMODE_COMMIT) + type = Type::tDirectory; // submodule // FIXME: add to cache - res.emplace(std::string(git_tree_entry_name(entry)), DirEntry{}); + res.emplace(std::string(git_tree_entry_name(entry)), type); } return res; diff --git a/src/libfetchers/include/nix/fetchers/git-utils.hh b/src/libfetchers/include/nix/fetchers/git-utils.hh index eada8745c3e..ca6ddd39f5f 100644 --- a/src/libfetchers/include/nix/fetchers/git-utils.hh +++ b/src/libfetchers/include/nix/fetchers/git-utils.hh @@ -40,6 +40,11 @@ struct GitRepo bool create = false; bool bare = false; bool packfilesOnly = false; + /** + * Open only the object database, bypassing full repository validation. + * Useful for repos with unsupported extensions (e.g., reftables). + */ + bool odbOnly = false; }; static ref openRepo(const std::filesystem::path & path, Options options); @@ -104,6 +109,18 @@ struct GitRepo virtual bool hasObject(const Hash & oid) = 0; + /** Get the SHA of a subtree entry within a tree object */ + virtual Hash getSubtreeSha(const Hash & treeSha, const std::string & entryName) = 0; + + /** Get the root tree SHA from a commit SHA */ + virtual Hash getCommitTree(const Hash & commitSha) = 0; + + /** + * Get list of dirty files by comparing a commit's tree against a workdir. + * Works with ODB-only repos (no refs needed). + */ + virtual std::set getDirtyFilesAgainstTree(const Hash & commitSha, const std::filesystem::path & workdirPath) = 0; + virtual ref getAccessor(const Hash & rev, const GitAccessorOptions & options, std::string displayPrefix) = 0;