RayforceDB
diff --git a/‎Makefile‎
Lines changed: 2 additions & 2 deletions b/‎Makefile‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎benchmark/benchmarks.py‎
Lines changed: 118 additions & 62 deletions b/‎benchmark/benchmarks.py‎
Lines changed: 118 additions & 62 deletions
@@ -82,8 +82,8 @@ test-cov:
 	python3 -m pytest -x -vv --cov=rayforce --cov-report=term-missing tests/
 
 lint:
-	python3 -m ruff format tests/ rayforce/
-	python3 -m ruff check rayforce/ --fix
+	python3 -m ruff format tests/ rayforce/ benchmark/
+	python3 -m ruff check rayforce/ benchmark/ --fix
 	python3 -m ruff check tests/ --fix --select I
 	python3 -m mypy rayforce/
 	clang-format -i rayforce/capi/*
 
@@ -1,6 +1,7 @@
-from rayforce import eval_str, Column
-from timer import time_microseconds
 import polars as pl
+from timer import time_microseconds
+
+from rayforce import Column, eval_str
 
 
 class BenchmarkError(Exception): ...
@@ -40,6 +41,17 @@ def run():
 
         return time_microseconds(run)
 
+    @staticmethod
+    def benchmark_q1_duckdb(conn):
+        """
+        Q1: Group by id1, sum v1
+        """
+
+        def run():
+            return conn.execute("SELECT id1, SUM(v1) as v1_sum FROM df GROUP BY id1").fetchdf()
+
+        return time_microseconds(run)
+
     @staticmethod
     def benchmark_q1_native_rayforce(table_name):
         """
@@ -51,20 +63,17 @@ def benchmark_q1_native_rayforce(table_name):
 
         if isinstance(result, dict) and "time" in result:
             return result["time"] * 1000, result
-        elif isinstance(result, (int, float)):
+        if isinstance(result, (int, float)):
             return result * 1000, result
-        elif hasattr(result, "to_python"):
+        if hasattr(result, "to_python"):
             # Handle Rayforce scalar types (F64, I64, etc.)
             value = result.to_python()
             return value * 1000, result
-        elif hasattr(result, "value"):
+        if hasattr(result, "value"):
             # Handle Rayforce scalar types with value property
             value = result.value
             return value * 1000, result
-        else:
-            raise BenchmarkError(
-                f"rayforce runtime returned unsupported measure: {type(result)}"
-            )
+        raise BenchmarkError(f"rayforce runtime returned unsupported measure: {type(result)}")
 
 
 class Q2:
@@ -101,6 +110,19 @@ def run():
 
         return time_microseconds(run)
 
+    @staticmethod
+    def benchmark_q2_duckdb(conn):
+        """
+        Q2: Group by id1, id2, sum v1
+        """
+
+        def run():
+            return conn.execute(
+                "SELECT id1, id2, SUM(v1) as v1_sum FROM df GROUP BY id1, id2"
+            ).fetchdf()
+
+        return time_microseconds(run)
+
     @staticmethod
     def benchmark_q2_native_rayforce(table_name):
         """
@@ -112,18 +134,15 @@ def benchmark_q2_native_rayforce(table_name):
 
         if isinstance(result, dict) and "time" in result:
             return result["time"] * 1000, result
-        elif isinstance(result, (int, float)):
+        if isinstance(result, (int, float)):
             return result * 1000, result
-        elif hasattr(result, "to_python"):
+        if hasattr(result, "to_python"):
             value = result.to_python()
             return value * 1000, result
-        elif hasattr(result, "value"):
+        if hasattr(result, "value"):
             value = result.value
             return value * 1000, result
-        else:
-            raise BenchmarkError(
-                f"rayforce runtime returned unsupported measure: {type(result)}"
-            )
+        raise BenchmarkError(f"rayforce runtime returned unsupported measure: {type(result)}")
 
 
 class Q3:
@@ -161,12 +180,24 @@ def benchmark_q3_polars(df):
 
         def run():
             return df.group_by("id3").agg(
-                pl.col("v1").sum().alias("v1_sum"),
-                pl.col("v3").mean().alias("v3_avg")
+                pl.col("v1").sum().alias("v1_sum"), pl.col("v3").mean().alias("v3_avg")
             )
 
         return time_microseconds(run)
 
+    @staticmethod
+    def benchmark_q3_duckdb(conn):
+        """
+        Q3: Group by id3, sum v1, avg v3
+        """
+
+        def run():
+            return conn.execute(
+                "SELECT id3, SUM(v1) as v1_sum, AVG(v3) as v3_avg FROM df GROUP BY id3"
+            ).fetchdf()
+
+        return time_microseconds(run)
+
     @staticmethod
     def benchmark_q3_native_rayforce(table_name):
         """
@@ -178,18 +209,15 @@ def benchmark_q3_native_rayforce(table_name):
 
         if isinstance(result, dict) and "time" in result:
             return result["time"] * 1000, result
-        elif isinstance(result, (int, float)):
+        if isinstance(result, (int, float)):
             return result * 1000, result
-        elif hasattr(result, "to_python"):
+        if hasattr(result, "to_python"):
             value = result.to_python()
             return value * 1000, result
-        elif hasattr(result, "value"):
+        if hasattr(result, "value"):
             value = result.value
             return value * 1000, result
-        else:
-            raise BenchmarkError(
-                f"rayforce runtime returned unsupported measure: {type(result)}"
-            )
+        raise BenchmarkError(f"rayforce runtime returned unsupported measure: {type(result)}")
 
 
 class Q4:
@@ -219,11 +247,7 @@ def benchmark_q4_pandas(df):
         """
 
         def run():
-            return (
-                df.groupby("id3")
-                .agg({"v1": "mean", "v2": "mean", "v3": "mean"})
-                .reset_index()
-            )
+            return df.groupby("id3").agg({"v1": "mean", "v2": "mean", "v3": "mean"}).reset_index()
 
         return time_microseconds(run)
 
@@ -237,11 +261,24 @@ def run():
             return df.group_by("id3").agg(
                 pl.col("v1").mean().alias("v1_avg"),
                 pl.col("v2").mean().alias("v2_avg"),
-                pl.col("v3").mean().alias("v3_avg")
+                pl.col("v3").mean().alias("v3_avg"),
             )
 
         return time_microseconds(run)
 
+    @staticmethod
+    def benchmark_q4_duckdb(conn):
+        """
+        Q4: Group by id3, avg v1, avg v2, avg v3
+        """
+
+        def run():
+            return conn.execute(
+                "SELECT id3, AVG(v1) as v1_avg, AVG(v2) as v2_avg, AVG(v3) as v3_avg FROM df GROUP BY id3"
+            ).fetchdf()
+
+        return time_microseconds(run)
+
     @staticmethod
     def benchmark_q4_native_rayforce(table_name):
         """
@@ -253,18 +290,15 @@ def benchmark_q4_native_rayforce(table_name):
 
         if isinstance(result, dict) and "time" in result:
             return result["time"] * 1000, result
-        elif isinstance(result, (int, float)):
+        if isinstance(result, (int, float)):
             return result * 1000, result
-        elif hasattr(result, "to_python"):
+        if hasattr(result, "to_python"):
             value = result.to_python()
             return value * 1000, result
-        elif hasattr(result, "value"):
+        if hasattr(result, "value"):
             value = result.value
             return value * 1000, result
-        else:
-            raise BenchmarkError(
-                f"rayforce runtime returned unsupported measure: {type(result)}"
-            )
+        raise BenchmarkError(f"rayforce runtime returned unsupported measure: {type(result)}")
 
 
 class Q5:
@@ -294,11 +328,7 @@ def benchmark_q5_pandas(df):
         """
 
         def run():
-            return (
-                df.groupby("id3")
-                .agg({"v1": "sum", "v2": "sum", "v3": "sum"})
-                .reset_index()
-            )
+            return df.groupby("id3").agg({"v1": "sum", "v2": "sum", "v3": "sum"}).reset_index()
 
         return time_microseconds(run)
 
@@ -312,11 +342,24 @@ def run():
             return df.group_by("id3").agg(
                 pl.col("v1").sum().alias("v1_sum"),
                 pl.col("v2").sum().alias("v2_sum"),
-                pl.col("v3").sum().alias("v3_sum")
+                pl.col("v3").sum().alias("v3_sum"),
             )
 
         return time_microseconds(run)
 
+    @staticmethod
+    def benchmark_q5_duckdb(conn):
+        """
+        Q5: Group by id3, sum v1, sum v2, sum v3
+        """
+
+        def run():
+            return conn.execute(
+                "SELECT id3, SUM(v1) as v1_sum, SUM(v2) as v2_sum, SUM(v3) as v3_sum FROM df GROUP BY id3"
+            ).fetchdf()
+
+        return time_microseconds(run)
+
     @staticmethod
     def benchmark_q5_native_rayforce(table_name):
         """
@@ -328,18 +371,15 @@ def benchmark_q5_native_rayforce(table_name):
 
         if isinstance(result, dict) and "time" in result:
             return result["time"] * 1000, result
-        elif isinstance(result, (int, float)):
+        if isinstance(result, (int, float)):
             return result * 1000, result
-        elif hasattr(result, "to_python"):
+        if hasattr(result, "to_python"):
             value = result.to_python()
             return value * 1000, result
-        elif hasattr(result, "value"):
+        if hasattr(result, "value"):
             value = result.value
             return value * 1000, result
-        else:
-            raise BenchmarkError(
-                f"rayforce runtime returned unsupported measure: {type(result)}"
-            )
+        raise BenchmarkError(f"rayforce runtime returned unsupported measure: {type(result)}")
 
 
 class Q6:
@@ -351,9 +391,7 @@ def benchmark_q6_rayforce(table):
 
         def run():
             return (
-                table.select(
-                    range_v1_v2=(Column("v1").max() - Column("v2").min())
-                )
+                table.select(range_v1_v2=(Column("v1").max() - Column("v2").min()))
                 .by("id3")
                 .execute()
             )
@@ -386,29 +424,41 @@ def run():
 
         return time_microseconds(run)
 
+    @staticmethod
+    def benchmark_q6_duckdb(conn):
+        """
+        Q6: Group by id3, max(v1) - min(v2)
+        """
+
+        def run():
+            return conn.execute(
+                "SELECT id3, MAX(v1) - MIN(v2) as range_v1_v2 FROM df GROUP BY id3"
+            ).fetchdf()
+
+        return time_microseconds(run)
+
     @staticmethod
     def benchmark_q6_native_rayforce(table_name):
         """
         Q6: Group by id3, max(v1) - min(v2)
         """
 
-        query = f"(timeit (select {{range_v1_v2: (- (max v1) (min v2)) by: id3 from: {table_name}}}))"
+        query = (
+            f"(timeit (select {{range_v1_v2: (- (max v1) (min v2)) by: id3 from: {table_name}}}))"
+        )
         result = eval_str(query)
 
         if isinstance(result, dict) and "time" in result:
             return result["time"] * 1000, result
-        elif isinstance(result, (int, float)):
+        if isinstance(result, (int, float)):
             return result * 1000, result
-        elif hasattr(result, "to_python"):
+        if hasattr(result, "to_python"):
             value = result.to_python()
             return value * 1000, result
-        elif hasattr(result, "value"):
+        if hasattr(result, "value"):
             value = result.value
             return value * 1000, result
-        else:
-            raise BenchmarkError(
-                f"rayforce runtime returned unsupported measure: {type(result)}"
-            )
+        raise BenchmarkError(f"rayforce runtime returned unsupported measure: {type(result)}")
 
 
 benchmarks = [
@@ -417,41 +467,47 @@ def benchmark_q6_native_rayforce(table_name):
         Q1.benchmark_q1_rayforce,
         Q1.benchmark_q1_pandas,
         Q1.benchmark_q1_polars,
+        Q1.benchmark_q1_duckdb,
         Q1.benchmark_q1_native_rayforce,
     ),
     (
         "Q2: Group by id1, id2, sum v1",
         Q2.benchmark_q2_rayforce,
         Q2.benchmark_q2_pandas,
         Q2.benchmark_q2_polars,
+        Q2.benchmark_q2_duckdb,
         Q2.benchmark_q2_native_rayforce,
     ),
     (
         "Q3: Group by id3, sum v1, avg v3",
         Q3.benchmark_q3_rayforce,
         Q3.benchmark_q3_pandas,
         Q3.benchmark_q3_polars,
+        Q3.benchmark_q3_duckdb,
         Q3.benchmark_q3_native_rayforce,
     ),
     (
         "Q4: Group by id3, avg v1, v2, v3",
         Q4.benchmark_q4_rayforce,
         Q4.benchmark_q4_pandas,
         Q4.benchmark_q4_polars,
+        Q4.benchmark_q4_duckdb,
         Q4.benchmark_q4_native_rayforce,
     ),
     (
         "Q5: Group by id3, sum v1, v2, v3",
         Q5.benchmark_q5_rayforce,
         Q5.benchmark_q5_pandas,
         Q5.benchmark_q5_polars,
+        Q5.benchmark_q5_duckdb,
         Q5.benchmark_q5_native_rayforce,
     ),
     (
         "Q6: Group by id3, max(v1) - min(v2)",
         Q6.benchmark_q6_rayforce,
         Q6.benchmark_q6_pandas,
         Q6.benchmark_q6_polars,
+        Q6.benchmark_q6_duckdb,
         Q6.benchmark_q6_native_rayforce,
     ),
 ]