add flamegraph

Atticus Kuhn · Atticus Kuhn · commit 28ff7fae5690 · 2026-01-28T17:04:30.000Z
diff --git a/docs/activeContext.md b/docs/activeContext.md
@@ -47,6 +47,8 @@ Latest changes:
   - `PartIiProject/Rust.lean`: generated `for` loops iterate via `.clone().into_iter()` to avoid moving maps used more than once.
 - Refactored the Rust AST to be DeBruijn-indexed (`Expr : Nat → Type`, vars are `Fin ctx`) and replaced stringly-typed runtime calls with `RuntimeFn`; updated `PartIiProject/CodegenRust.lean` accordingly.
 - Added a performance benchmarking runner `Performance.lean` (flake app `performanceComparison`) that compares runtime (ms) of `sdql-rs` binaries vs Lean-generated Rust binaries, including microbenchmarks and TPCH cases.
+- Added a flamegraph profiling runner `Flamegraph.lean` (flake app `flamegraph`) that generates per-TPCH SVGs for Lean-generated Rust binaries using `cargo flamegraph`.
+- Flamegraph runner now rewrites `TPCH_DATASET_PATH` and TPCH tiny load paths to absolute paths so profiling works from the generated cargo workspace.
 - Refined the optimisation benchmarking runner `OptimisationPerformanceComparison.lean` (flake app `optimisationPerformanceComparison`) to use `[SDQLProg2 { T }| ... ]` programs, drop CLI arg parsing, and increase default input sizes so optimisation effects are clearer.
 - Fixed a dependent-pattern-matching blocker in optimisation passes by refactoring `Term2.mul`/`Term2.proj` to carry typeclass witnesses (`has_tensor`/`has_proj`) instead of computed indices (`tensor` / `List.getD`) directly.
 - Added a small `Term2` optimisation framework (`PartIiProject/Optimisations/Apply.lean`) where each rewrite is a non-recursive `Optimisation` and `applyOptimisations{,Loc}` performs the recursive traversal + (fuel-bounded) fixpoint iteration.
diff --git a/docs/progress.md b/docs/progress.md
@@ -45,6 +45,8 @@ What works:
 - CI: GitHub Actions workflow builds the project and runs the test executable on pushes/PRs.
 - `nix run` support: wrapper script ensures datasets are present and runs tests with proper environment setup; reference binaries are built on-demand by the Lean test runner when missing.
 - Performance comparison: `Performance.lean` executable `performanceComparison` benchmarks runtime (ms) of `sdql-rs` reference binaries vs Lean-generated Rust binaries.
+- Flamegraph profiling: `Flamegraph.lean` executable `flamegraph` generates per-TPCH SVG flamegraphs for Lean-generated Rust binaries.
+- Flamegraph runner normalizes `TPCH_DATASET_PATH` and TPCH tiny load paths to absolute paths so dataset loading works from the profiling cargo workspace.
 - Optimisation performance comparison: `OptimisationPerformanceComparison.lean` executable `optimisationPerformanceComparison` benchmarks runtime (ms) of unoptimised vs optimised Lean-generated Rust binaries for the implemented SDQL optimisations.
 - Surface/core terms are DeBruijn-indexed: surface terms in `SurfaceCore2.lean`, core terms in `Term2.lean`, with lowering in `ToCore2`.
 - Optimisation-friendly `Term2` indices: `mul`/`proj` carry `has_tensor`/`has_proj` witnesses to avoid dependent-elimination failures when pattern-matching in optimisation passes.
diff --git a/docs/techContext.md b/docs/techContext.md
@@ -54,6 +54,7 @@ How to run:
 - Run tests: `lake exe sdql-tests`.
 - Preferred: `nix run` (runs the full test suite; sdql-rs TPCH reference binaries are built on-demand by the Lean test runner via `cargo build --release --bin ...` when missing).
 - Performance comparison: `nix run .#performanceComparison` (times `sdql-rs` binaries vs Lean-generated Rust binaries; must be run from the project root).
+- Flamegraph profiling: `nix run .#flamegraph` (generates per-TPCH SVGs for Lean-generated Rust binaries; must be run from the project root).
 - Optimisation performance comparison: `nix run .#optimisationPerformanceComparison` (times unoptimised vs optimised Lean-generated Rust binaries for a small suite of SDQL optimisation patterns).
 - Explore: open the `.lean` files and evaluate examples with `#eval`.
 - Try the DSL: use `[SDQLProg2 { int }| 3 + 5 ]` (runs the full pipeline) or start from `[SDQL| 3 + 5 ]` and call `loadTermToSProg2` explicitly.
diff --git a/flake.nix b/flake.nix
@@ -59,6 +59,11 @@
             src = ./.;
           };
 
+          sdqlFlamegraph = lake.mkPackage {
+            name = "flamegraph";
+            src = ./.;
+          };
+
           # Wrapper script that sets up datasets for tests and runs the Lean test runner.
           # TPCH reference binaries are built on-demand by `Tests/Main.lean`.
           sdqlTestsWithRef = pkgs.writeShellApplication {
@@ -123,6 +128,33 @@
               exec ${sdqlOptPerf}/bin/optimisationPerformanceComparison "$@"
             '';
           };
+
+          flamegraphRunner = pkgs.writeShellApplication {
+            name = "flamegraph";
+            runtimeInputs =
+              [ rustToolchain pkgs.cargo-flamegraph ]
+              ++ pkgs.lib.optionals pkgs.stdenv.isLinux [ pkgs.linuxPackages.perf ];
+            text = ''
+              set -euo pipefail
+
+              if [ ! -f "sdql_runtime.rs" ]; then
+                echo "Error: must be run from the project root directory (sdql_runtime.rs not found)" >&2
+                exit 1
+              fi
+
+              if [ ! -d "datasets/tpch-tiny" ]; then
+                echo "Error: datasets/tpch-tiny not found" >&2
+                exit 1
+              fi
+
+              if [ ! -d "sdql-rs/datasets/tpch_datasets/SF_0.01" ]; then
+                echo "Error: sdql-rs/datasets/tpch_datasets/SF_0.01 not found" >&2
+                exit 1
+              fi
+
+              exec ${sdqlFlamegraph}/bin/flamegraph "$@"
+            '';
+          };
           # Runtime tools shared by sdql reference test runners
           sdqlRefRuntimeInputs = with pkgs; [
             # JVM + Scala toolchain
@@ -218,6 +250,7 @@
             sdql-tests-bare = sdqlTests;
             performanceComparison = performanceComparison;
             optimisationPerformanceComparison = optimisationPerformanceComparison;
+            flamegraph = flamegraphRunner;
             sdql-reference-tests = sdqlRefTestRunner;
             sdql-reference-tpch-0_01 = sdqlRefTPCH001;
             sdql-reference-tpch-1 = sdqlRefTPCH1;
@@ -257,6 +290,10 @@
               type = "app";
               program = "${optimisationPerformanceComparison}/bin/optimisationPerformanceComparison";
             };
+            flamegraph = {
+              type = "app";
+              program = "${flamegraphRunner}/bin/flamegraph";
+            };
           };
 
           devShells.default = pkgs.mkShell {
diff --git a/lakefile.toml b/lakefile.toml
@@ -28,3 +28,7 @@ root = "Performance"
 [[lean_exe]]
 name = "optimisationPerformanceComparison"
 root = "OptimisationPerformanceComparison"
+
+[[lean_exe]]
+name = "flamegraph"
+root = "Flamegraph"
diff --git a/todos.md b/todos.md
@@ -14,6 +14,8 @@
 - start writing paper (in typst or latex), and add to CI/CD
 - optimisation pipeline is ugly, could this be improved by the use of a monad?
 - Current rust implementation is slowed down a lot by overused of `x.clone()` in BTreeMap. I'm not good enough with Rust to avoid this.
+- Use Rust profiling. Use a profiler for any serious performance analysis.
+
 ```bash
 [atticusk@nixos:~/coding/part_ii_project]$ find PartIiProject -name "*.lean" -exec wc -l {} + | sort -nr | head -n10
   3987 total