add optimistaions

Atticus Kuhn · Atticus Kuhn · commit fecfc24d4971 · 2026-01-10T14:24:28.000-08:00
diff --git a/PartIiProject/Optimisations/Term2Utils.lean b/PartIiProject/Optimisations/Term2Utils.lean
@@ -78,6 +78,26 @@ def liftSubst2 {ctx ctx' : List Ty} {a b : Ty} (σ : Subst ctx ctx') :
     Subst (a :: b :: ctx) (a :: b :: ctx') :=
   liftSubst (a := a) (liftSubst (a := b) σ)
 
+mutual
+  /-- A "dummy" term of a given type, intended only for unreachable substitution branches. -/
+  def defaultTerm2 {ctx : List Ty} : {ty : Ty} → Term2 ctx ty
+    | .bool => .constBool false
+    | .int => .constInt 0
+    | .real => .constReal 0.0
+    | .maxProduct => .promote (.mk SourceLocation.unknown (.constReal 0.0))
+    | .date => .builtin (.DateLit 0) (.mk SourceLocation.unknown (.constRecord .nil))
+    | .string => .constString ""
+    | .record l => .constRecord (defaultFields2 (ctx := ctx) l)
+    | .dict _ _ => .emptyDict
+
+  def defaultLoc2 {ctx : List Ty} : {ty : Ty} → TermLoc2 ctx ty
+    | ty => .mk SourceLocation.unknown (defaultTerm2 (ctx := ctx) (ty := ty))
+
+  def defaultFields2 {ctx : List Ty} : (l : List Ty) → TermFields2 ctx l
+    | [] => .nil
+    | t :: ts => .cons (defaultLoc2 (ctx := ctx) (ty := t)) (defaultFields2 (ctx := ctx) ts)
+end
+
 mutual
   def substTerm2 {ctx ctx' : List Ty} {ty : Ty}
       (σ : Subst ctx ctx') : Term2 ctx ty → Term2 ctx' ty
@@ -126,11 +146,11 @@ mutual
     | .lookup _ d k => mentionsIndexLoc d i || mentionsIndexLoc k i
     | .not e => mentionsIndexLoc e i
     | .ite c t f => mentionsIndexLoc c i || mentionsIndexLoc t i || mentionsIndexLoc f i
-    | .letin bound body => mentionsIndexLoc bound i || mentionsIndexLoc body i
+    | .letin bound body => mentionsIndexLoc bound i || mentionsIndexLoc body (i + 1)
     | .add _ t1 t2 => mentionsIndexLoc t1 i || mentionsIndexLoc t2 i
     | @Term2.mul _ _ _ _ _ _ _ _ t1 t2 => mentionsIndexLoc t1 i || mentionsIndexLoc t2 i
     | .promote e => mentionsIndexLoc e i
-    | .sum _ d body => mentionsIndexLoc d i || mentionsIndexLoc body i
+    | .sum _ d body => mentionsIndexLoc d i || mentionsIndexLoc body (i + 2)
     | @Term2.proj _ _ _ record _ _ => mentionsIndexLoc record i
     | .builtin _ arg => mentionsIndexLoc arg i
 
diff --git a/PartIiProject/Optimisations/VerticalLoopFusion.lean b/PartIiProject/Optimisations/VerticalLoopFusion.lean
@@ -6,50 +6,6 @@ namespace PartIiProject.Optimisations
 
 open PartIiProject.Optimisations.Term2
 
-private def varMem? {ctx : List Ty} {ty : Ty} (tm : Term2 ctx ty) : Option (Mem ty ctx) :=
-  match tm with
-  | .var m => some m
-  | .constInt _ => none
-  | .constReal _ => none
-  | .constBool _ => none
-  | .constString _ => none
-  | .constRecord _ => none
-  | .emptyDict => none
-  | .dictInsert _ _ _ => none
-  | .lookup _ _ _ => none
-  | .not _ => none
-  | .ite _ _ _ => none
-  | .letin _ _ => none
-  | .add _ _ _ => none
-  | @Term2.mul _ _ _ _ _ _ _ _ _ _ => none
-  | .promote _ => none
-  | .sum _ _ _ => none
-  | @Term2.proj _ _ _ _ _ _ => none
-  | .builtin _ _ => none
-
-private def singletonDictInsert?
-    {ctx : List Ty} {ty : Ty} (tm : Term2 ctx ty) :
-    Option (Σ dom : Ty, Σ range : Ty, TermLoc2 ctx dom × TermLoc2 ctx range) :=
-  match tm with
-  | @Term2.dictInsert _ dom range k v _d => some ⟨dom, ⟨range, (k, v)⟩⟩
-  | .var _ => none
-  | .constInt _ => none
-  | .constReal _ => none
-  | .constBool _ => none
-  | .constString _ => none
-  | .constRecord _ => none
-  | .emptyDict => none
-  | .lookup _ _ _ => none
-  | .not _ => none
-  | .ite _ _ _ => none
-  | .letin _ _ => none
-  | .add _ _ _ => none
-  | @Term2.mul _ _ _ _ _ _ _ _ _ _ => none
-  | .promote _ => none
-  | .sum _ _ _ => none
-  | @Term2.proj _ _ _ _ _ _ => none
-  | .builtin _ _ => none
-
 /--
 Vertical loop fusion, specialized to the two common "singleton dict" shapes:
 
@@ -61,10 +17,71 @@ Vertical loop fusion, specialized to the two common "singleton dict" shapes:
    `let y = sum(<x,x_v> in e1) { x -> f1(x_v) } in sum(<x,x_v> in y) { x -> f2(x_v) }`
    `↦ sum(<x,x_v> in e1) { x -> f2(f1(x_v)) }`
 -/
-def verticalLoopFusion2 : Optimisation
-  := fun {ctx} {ty} t =>
-      match t with
-    | t@Term2.letin (⟨_,  .sum a dict ⟨ _, .dictInsert x y z⟩  ⟩ ) let_in_body => .some t
-    | _ => .none
+def verticalLoopFusionKeyMap2 : Optimisation :=
+  fun {ctx} {ty} t =>
+    match t with
+    | Term2.letin
+        (.mk _ (Term2.sum _ e₁ (.mk _ (.dictInsert k₁ v₁ (.mk _ .emptyDict)))))
+        (.mk _ (Term2.sum a₂
+          (.mk _ (.var (.head _)))
+          (.mk bodyLoc (.dictInsert k₂ v₂ (.mk emptyLoc .emptyDict))))) =>
+        match v₁.term, v₂.term with
+        | .var (.tail _ (.head _)), .var (.tail _ (.head _)) =>
+            if Term2.mentionsIndexLoc k₁ 1 || Term2.mentionsIndexLoc k₂ 1 || Term2.mentionsIndexLoc k₂ 2 then
+              none
+            else
+              let σ : Term2.Subst (_ :: _ :: (.dict _ _) :: ctx) (_ :: _ :: ctx) :=
+                fun {ty} m =>
+                  match m with
+                  | .head _ => k₁.term
+                  | .tail _ m =>
+                      match m with
+                      | .head _ => .var (.tail _ (.head ctx))
+                      | .tail _ m =>
+                          match m with
+                          | .head _ => Term2.defaultTerm2
+                          | .tail _ m => .var (.tail _ (.tail _ m))
+              let k₂' := Term2.substLoc2 σ k₂
+              let v₂' := Term2.substLoc2 σ v₂
+              let emptyFused : TermLoc2 (_ :: _ :: ctx) (.dict _ _) := .mk emptyLoc .emptyDict
+              let fusedBody : TermLoc2 (_ :: _ :: ctx) (.dict _ _) :=
+                .mk bodyLoc (.dictInsert k₂' v₂' emptyFused)
+              some (Term2.sum a₂ e₁ fusedBody)
+        | _, _ => none
+    | _ => none
+
+def verticalLoopFusionValueMap2 : Optimisation :=
+  fun {ctx} {ty} t =>
+    match t with
+    | Term2.letin
+        (.mk _ (Term2.sum _ e₁ (.mk _ (.dictInsert k₁ v₁ (.mk _ .emptyDict)))))
+        (.mk _ (Term2.sum a₂
+          (.mk _ (.var (.head _)))
+          (.mk bodyLoc (.dictInsert k₂ v₂ (.mk emptyLoc .emptyDict))))) =>
+        match k₁.term, k₂.term with
+        | .var (.head _), .var (.head _) =>
+            if Term2.mentionsIndexLoc v₁ 0 || Term2.mentionsIndexLoc v₂ 0 || Term2.mentionsIndexLoc v₂ 2 then
+              none
+            else
+              let σ : Term2.Subst (_ :: _ :: (.dict _ _) :: ctx) (_ :: _ :: ctx) :=
+                fun {ty} m =>
+                  match m with
+                  | .head _ => k₁.term
+                  | .tail _ m =>
+                      match m with
+                      | .head _ => v₁.term
+                      | .tail _ m =>
+                          match m with
+                          | .head _ => Term2.defaultTerm2
+                          | .tail _ m => .var (.tail _ (.tail _ m))
+              let k₂' := Term2.substLoc2 σ k₂
+              let v₂' := Term2.substLoc2 σ v₂
+              let emptyFused : TermLoc2 (_ :: _ :: ctx) (.dict _ _) := .mk emptyLoc .emptyDict
+              let fusedBody : TermLoc2 (_ :: _ :: ctx) (.dict _ _) :=
+                .mk bodyLoc (.dictInsert k₂' v₂' emptyFused)
+              some (Term2.sum a₂ e₁ fusedBody)
+        | _, _ => none
+    | _ => none
+
 
 end PartIiProject.Optimisations
diff --git a/Tests/GuardMsgs.lean b/Tests/GuardMsgs.lean
@@ -1,4 +1,5 @@
 import PartIiProject.SyntaxSDQLProg
+import Tests.Optimisations.VerticalLoopFusion
 
 open PartIiProject
 
diff --git a/Tests/Optimisations/VerticalLoopFusion.lean b/Tests/Optimisations/VerticalLoopFusion.lean
@@ -0,0 +1,32 @@
+import PartIiProject.Optimisations
+import PartIiProject.SyntaxSDQLProg
+
+open PartIiProject
+open PartIiProject.Optimisations
+
+namespace Tests.Optimisations.VerticalLoopFusion
+
+open ToCore2 in
+unsafe def optimiseCoreTerm (p : SProg2) : String :=
+  let core := trProg2 p
+  let term' := applyOptimisationsLoc [verticalLoopFusionKeyMap2, verticalLoopFusionValueMap2] core.term
+  Term2.showTermLoc2 [] term'
+
+/-- info: "sum(x, y in {1 -> 10} ++ {} + {2 -> 20} ++ {}) {x + 1 + 2 -> y} ++ {}" -/
+#guard_msgs in
+#eval optimiseCoreTerm
+  ([SDQLProg2 { { int -> int } }|
+    let y = sum( <x, x_v> <- ({ 1 -> 10 } + { 2 -> 20 }) ) { x + 1 -> x_v } in
+    sum( <x, x_v> <- y ) { x + 2 -> x_v }
+  ] : SProg2)
+
+/-- info: "sum(x, y in {1 -> 10} ++ {} + {2 -> 20} ++ {}) {x -> y + 1 + 2} ++ {}" -/
+#guard_msgs in
+#eval optimiseCoreTerm
+  ([SDQLProg2 { { int -> int } }|
+    let y = sum( <x, x_v> <- ({ 1 -> 10 } + { 2 -> 20 }) ) { x -> x_v + 1 } in
+    sum( <x, x_v> <- y ) { x -> x_v + 2 }
+  ] : SProg2)
+
+end Tests.Optimisations.VerticalLoopFusion
+
diff --git a/docs/activeContext.md b/docs/activeContext.md
@@ -46,6 +46,9 @@ Latest changes:
 - Refactored the Rust AST to be DeBruijn-indexed (`Expr : Nat → Type`, vars are `Fin ctx`) and replaced stringly-typed runtime calls with `RuntimeFn`; updated `PartIiProject/CodegenRust.lean` accordingly.
 - Added a performance benchmarking runner `Performance.lean` (flake app `performanceComparsion`) that compares runtime (ms) of `sdql-rs` binaries vs Lean-generated Rust binaries, including microbenchmarks and TPCH cases.
 - Fixed a dependent-pattern-matching blocker in optimisation passes by refactoring `Term2.mul`/`Term2.proj` to carry typeclass witnesses (`has_tensor`/`has_proj`) instead of computed indices (`tensor` / `List.getD`) directly.
+- Added a small `Term2` optimisation framework (`PartIiProject/Optimisations/Apply.lean`) where each rewrite is a non-recursive `Optimisation` and `applyOptimisations{,Loc}` performs the recursive traversal + (fuel-bounded) fixpoint iteration.
+- Implemented vertical loop fusion over `Term2` as two separate rewrites in `PartIiProject/Optimisations/VerticalLoopFusion.lean` (`verticalLoopFusionKeyMap2` and `verticalLoopFusionValueMap2`).
+- Added/confirmed `#guard_msgs` coverage for vertical loop fusion in `Tests/Optimisations/VerticalLoopFusion.lean` (pulled in via `Tests/GuardMsgs.lean`).
 
 Next steps (proposed):
 
diff --git a/docs/progress.md b/docs/progress.md
@@ -45,6 +45,7 @@ What works:
 - Performance comparison: `Performance.lean` executable `performanceComparsion` benchmarks runtime (ms) of `sdql-rs` reference binaries vs Lean-generated Rust binaries.
 - Surface/core terms are DeBruijn-indexed: surface terms in `SurfaceCore2.lean`, core terms in `Term2.lean`, with lowering in `ToCore2`.
 - Optimisation-friendly `Term2` indices: `mul`/`proj` carry `has_tensor`/`has_proj` witnesses to avoid dependent-elimination failures when pattern-matching in optimisation passes.
+- Optimisations over `Term2`: `PartIiProject/Optimisations/Apply.lean` provides a recursive driver for non-recursive `Optimisation` rewrites; `PartIiProject/Optimisations/VerticalLoopFusion.lean` implements key-map and value-map vertical loop fusion with `#guard_msgs` regression tests.
 
 What's left to build:
 
diff --git a/docs/systemPatterns.md b/docs/systemPatterns.md
@@ -110,6 +110,7 @@ Notable patterns:
 - For optimisation passes that pattern-match on `Term2`, avoid computed indices in inductive families:
   - `Term2.mul` carries a `has_tensor t1 t2 t3` witness (typeclass) instead of returning `Term2 ctx (tensor t1 t2)` directly.
   - `Term2.proj` carries a `has_proj l i t` witness instead of returning `Term2 ctx (l.getD i Ty.int)` directly.
+- Optimisation passes are structured as local, non-recursive rewrites over core terms (`PartIiProject/Optimisations/Apply.lean`): each `Optimisation` is `Term2 ctx ty → Option (Term2 ctx ty)`, and `applyOptimisations{,Loc}` provides the recursive traversal and fuel-bounded fixpoint iteration.
 - Addition and scaling are encoded as explicit evidence, guiding typing and compilation.
 - Lookups and sums rely on the additive identity of the result to stay total and align with sparse semantics.
 - Tests compare Rust program output against expected strings or a reference binary. Rust programs use `SDQLShow::show(&result)`.

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`import PartIiProject.SyntaxSDQLProg`
	`2`	`+import Tests.Optimisations.VerticalLoopFusion`
`2`	`3`
`3`	`4`	`open PartIiProject`
`4`	`5`