1- from rayforce import eval_str , Column
2- from timer import time_microseconds
31import polars as pl
2+ from timer import time_microseconds
3+
4+ from rayforce import Column , eval_str
45
56
67class BenchmarkError (Exception ): ...
@@ -40,6 +41,17 @@ def run():
4041
4142 return time_microseconds (run )
4243
44+ @staticmethod
45+ def benchmark_q1_duckdb (conn ):
46+ """
47+ Q1: Group by id1, sum v1
48+ """
49+
50+ def run ():
51+ return conn .execute ("SELECT id1, SUM(v1) as v1_sum FROM df GROUP BY id1" ).fetchdf ()
52+
53+ return time_microseconds (run )
54+
4355 @staticmethod
4456 def benchmark_q1_native_rayforce (table_name ):
4557 """
@@ -51,20 +63,17 @@ def benchmark_q1_native_rayforce(table_name):
5163
5264 if isinstance (result , dict ) and "time" in result :
5365 return result ["time" ] * 1000 , result
54- elif isinstance (result , (int , float )):
66+ if isinstance (result , (int , float )):
5567 return result * 1000 , result
56- elif hasattr (result , "to_python" ):
68+ if hasattr (result , "to_python" ):
5769 # Handle Rayforce scalar types (F64, I64, etc.)
5870 value = result .to_python ()
5971 return value * 1000 , result
60- elif hasattr (result , "value" ):
72+ if hasattr (result , "value" ):
6173 # Handle Rayforce scalar types with value property
6274 value = result .value
6375 return value * 1000 , result
64- else :
65- raise BenchmarkError (
66- f"rayforce runtime returned unsupported measure: { type (result )} "
67- )
76+ raise BenchmarkError (f"rayforce runtime returned unsupported measure: { type (result )} " )
6877
6978
7079class Q2 :
@@ -101,6 +110,19 @@ def run():
101110
102111 return time_microseconds (run )
103112
113+ @staticmethod
114+ def benchmark_q2_duckdb (conn ):
115+ """
116+ Q2: Group by id1, id2, sum v1
117+ """
118+
119+ def run ():
120+ return conn .execute (
121+ "SELECT id1, id2, SUM(v1) as v1_sum FROM df GROUP BY id1, id2"
122+ ).fetchdf ()
123+
124+ return time_microseconds (run )
125+
104126 @staticmethod
105127 def benchmark_q2_native_rayforce (table_name ):
106128 """
@@ -112,18 +134,15 @@ def benchmark_q2_native_rayforce(table_name):
112134
113135 if isinstance (result , dict ) and "time" in result :
114136 return result ["time" ] * 1000 , result
115- elif isinstance (result , (int , float )):
137+ if isinstance (result , (int , float )):
116138 return result * 1000 , result
117- elif hasattr (result , "to_python" ):
139+ if hasattr (result , "to_python" ):
118140 value = result .to_python ()
119141 return value * 1000 , result
120- elif hasattr (result , "value" ):
142+ if hasattr (result , "value" ):
121143 value = result .value
122144 return value * 1000 , result
123- else :
124- raise BenchmarkError (
125- f"rayforce runtime returned unsupported measure: { type (result )} "
126- )
145+ raise BenchmarkError (f"rayforce runtime returned unsupported measure: { type (result )} " )
127146
128147
129148class Q3 :
@@ -161,12 +180,24 @@ def benchmark_q3_polars(df):
161180
162181 def run ():
163182 return df .group_by ("id3" ).agg (
164- pl .col ("v1" ).sum ().alias ("v1_sum" ),
165- pl .col ("v3" ).mean ().alias ("v3_avg" )
183+ pl .col ("v1" ).sum ().alias ("v1_sum" ), pl .col ("v3" ).mean ().alias ("v3_avg" )
166184 )
167185
168186 return time_microseconds (run )
169187
188+ @staticmethod
189+ def benchmark_q3_duckdb (conn ):
190+ """
191+ Q3: Group by id3, sum v1, avg v3
192+ """
193+
194+ def run ():
195+ return conn .execute (
196+ "SELECT id3, SUM(v1) as v1_sum, AVG(v3) as v3_avg FROM df GROUP BY id3"
197+ ).fetchdf ()
198+
199+ return time_microseconds (run )
200+
170201 @staticmethod
171202 def benchmark_q3_native_rayforce (table_name ):
172203 """
@@ -178,18 +209,15 @@ def benchmark_q3_native_rayforce(table_name):
178209
179210 if isinstance (result , dict ) and "time" in result :
180211 return result ["time" ] * 1000 , result
181- elif isinstance (result , (int , float )):
212+ if isinstance (result , (int , float )):
182213 return result * 1000 , result
183- elif hasattr (result , "to_python" ):
214+ if hasattr (result , "to_python" ):
184215 value = result .to_python ()
185216 return value * 1000 , result
186- elif hasattr (result , "value" ):
217+ if hasattr (result , "value" ):
187218 value = result .value
188219 return value * 1000 , result
189- else :
190- raise BenchmarkError (
191- f"rayforce runtime returned unsupported measure: { type (result )} "
192- )
220+ raise BenchmarkError (f"rayforce runtime returned unsupported measure: { type (result )} " )
193221
194222
195223class Q4 :
@@ -219,11 +247,7 @@ def benchmark_q4_pandas(df):
219247 """
220248
221249 def run ():
222- return (
223- df .groupby ("id3" )
224- .agg ({"v1" : "mean" , "v2" : "mean" , "v3" : "mean" })
225- .reset_index ()
226- )
250+ return df .groupby ("id3" ).agg ({"v1" : "mean" , "v2" : "mean" , "v3" : "mean" }).reset_index ()
227251
228252 return time_microseconds (run )
229253
@@ -237,11 +261,24 @@ def run():
237261 return df .group_by ("id3" ).agg (
238262 pl .col ("v1" ).mean ().alias ("v1_avg" ),
239263 pl .col ("v2" ).mean ().alias ("v2_avg" ),
240- pl .col ("v3" ).mean ().alias ("v3_avg" )
264+ pl .col ("v3" ).mean ().alias ("v3_avg" ),
241265 )
242266
243267 return time_microseconds (run )
244268
269+ @staticmethod
270+ def benchmark_q4_duckdb (conn ):
271+ """
272+ Q4: Group by id3, avg v1, avg v2, avg v3
273+ """
274+
275+ def run ():
276+ return conn .execute (
277+ "SELECT id3, AVG(v1) as v1_avg, AVG(v2) as v2_avg, AVG(v3) as v3_avg FROM df GROUP BY id3"
278+ ).fetchdf ()
279+
280+ return time_microseconds (run )
281+
245282 @staticmethod
246283 def benchmark_q4_native_rayforce (table_name ):
247284 """
@@ -253,18 +290,15 @@ def benchmark_q4_native_rayforce(table_name):
253290
254291 if isinstance (result , dict ) and "time" in result :
255292 return result ["time" ] * 1000 , result
256- elif isinstance (result , (int , float )):
293+ if isinstance (result , (int , float )):
257294 return result * 1000 , result
258- elif hasattr (result , "to_python" ):
295+ if hasattr (result , "to_python" ):
259296 value = result .to_python ()
260297 return value * 1000 , result
261- elif hasattr (result , "value" ):
298+ if hasattr (result , "value" ):
262299 value = result .value
263300 return value * 1000 , result
264- else :
265- raise BenchmarkError (
266- f"rayforce runtime returned unsupported measure: { type (result )} "
267- )
301+ raise BenchmarkError (f"rayforce runtime returned unsupported measure: { type (result )} " )
268302
269303
270304class Q5 :
@@ -294,11 +328,7 @@ def benchmark_q5_pandas(df):
294328 """
295329
296330 def run ():
297- return (
298- df .groupby ("id3" )
299- .agg ({"v1" : "sum" , "v2" : "sum" , "v3" : "sum" })
300- .reset_index ()
301- )
331+ return df .groupby ("id3" ).agg ({"v1" : "sum" , "v2" : "sum" , "v3" : "sum" }).reset_index ()
302332
303333 return time_microseconds (run )
304334
@@ -312,11 +342,24 @@ def run():
312342 return df .group_by ("id3" ).agg (
313343 pl .col ("v1" ).sum ().alias ("v1_sum" ),
314344 pl .col ("v2" ).sum ().alias ("v2_sum" ),
315- pl .col ("v3" ).sum ().alias ("v3_sum" )
345+ pl .col ("v3" ).sum ().alias ("v3_sum" ),
316346 )
317347
318348 return time_microseconds (run )
319349
350+ @staticmethod
351+ def benchmark_q5_duckdb (conn ):
352+ """
353+ Q5: Group by id3, sum v1, sum v2, sum v3
354+ """
355+
356+ def run ():
357+ return conn .execute (
358+ "SELECT id3, SUM(v1) as v1_sum, SUM(v2) as v2_sum, SUM(v3) as v3_sum FROM df GROUP BY id3"
359+ ).fetchdf ()
360+
361+ return time_microseconds (run )
362+
320363 @staticmethod
321364 def benchmark_q5_native_rayforce (table_name ):
322365 """
@@ -328,18 +371,15 @@ def benchmark_q5_native_rayforce(table_name):
328371
329372 if isinstance (result , dict ) and "time" in result :
330373 return result ["time" ] * 1000 , result
331- elif isinstance (result , (int , float )):
374+ if isinstance (result , (int , float )):
332375 return result * 1000 , result
333- elif hasattr (result , "to_python" ):
376+ if hasattr (result , "to_python" ):
334377 value = result .to_python ()
335378 return value * 1000 , result
336- elif hasattr (result , "value" ):
379+ if hasattr (result , "value" ):
337380 value = result .value
338381 return value * 1000 , result
339- else :
340- raise BenchmarkError (
341- f"rayforce runtime returned unsupported measure: { type (result )} "
342- )
382+ raise BenchmarkError (f"rayforce runtime returned unsupported measure: { type (result )} " )
343383
344384
345385class Q6 :
@@ -351,9 +391,7 @@ def benchmark_q6_rayforce(table):
351391
352392 def run ():
353393 return (
354- table .select (
355- range_v1_v2 = (Column ("v1" ).max () - Column ("v2" ).min ())
356- )
394+ table .select (range_v1_v2 = (Column ("v1" ).max () - Column ("v2" ).min ()))
357395 .by ("id3" )
358396 .execute ()
359397 )
@@ -386,29 +424,41 @@ def run():
386424
387425 return time_microseconds (run )
388426
427+ @staticmethod
428+ def benchmark_q6_duckdb (conn ):
429+ """
430+ Q6: Group by id3, max(v1) - min(v2)
431+ """
432+
433+ def run ():
434+ return conn .execute (
435+ "SELECT id3, MAX(v1) - MIN(v2) as range_v1_v2 FROM df GROUP BY id3"
436+ ).fetchdf ()
437+
438+ return time_microseconds (run )
439+
389440 @staticmethod
390441 def benchmark_q6_native_rayforce (table_name ):
391442 """
392443 Q6: Group by id3, max(v1) - min(v2)
393444 """
394445
395- query = f"(timeit (select {{range_v1_v2: (- (max v1) (min v2)) by: id3 from: { table_name } }}))"
446+ query = (
447+ f"(timeit (select {{range_v1_v2: (- (max v1) (min v2)) by: id3 from: { table_name } }}))"
448+ )
396449 result = eval_str (query )
397450
398451 if isinstance (result , dict ) and "time" in result :
399452 return result ["time" ] * 1000 , result
400- elif isinstance (result , (int , float )):
453+ if isinstance (result , (int , float )):
401454 return result * 1000 , result
402- elif hasattr (result , "to_python" ):
455+ if hasattr (result , "to_python" ):
403456 value = result .to_python ()
404457 return value * 1000 , result
405- elif hasattr (result , "value" ):
458+ if hasattr (result , "value" ):
406459 value = result .value
407460 return value * 1000 , result
408- else :
409- raise BenchmarkError (
410- f"rayforce runtime returned unsupported measure: { type (result )} "
411- )
461+ raise BenchmarkError (f"rayforce runtime returned unsupported measure: { type (result )} " )
412462
413463
414464benchmarks = [
@@ -417,41 +467,47 @@ def benchmark_q6_native_rayforce(table_name):
417467 Q1 .benchmark_q1_rayforce ,
418468 Q1 .benchmark_q1_pandas ,
419469 Q1 .benchmark_q1_polars ,
470+ Q1 .benchmark_q1_duckdb ,
420471 Q1 .benchmark_q1_native_rayforce ,
421472 ),
422473 (
423474 "Q2: Group by id1, id2, sum v1" ,
424475 Q2 .benchmark_q2_rayforce ,
425476 Q2 .benchmark_q2_pandas ,
426477 Q2 .benchmark_q2_polars ,
478+ Q2 .benchmark_q2_duckdb ,
427479 Q2 .benchmark_q2_native_rayforce ,
428480 ),
429481 (
430482 "Q3: Group by id3, sum v1, avg v3" ,
431483 Q3 .benchmark_q3_rayforce ,
432484 Q3 .benchmark_q3_pandas ,
433485 Q3 .benchmark_q3_polars ,
486+ Q3 .benchmark_q3_duckdb ,
434487 Q3 .benchmark_q3_native_rayforce ,
435488 ),
436489 (
437490 "Q4: Group by id3, avg v1, v2, v3" ,
438491 Q4 .benchmark_q4_rayforce ,
439492 Q4 .benchmark_q4_pandas ,
440493 Q4 .benchmark_q4_polars ,
494+ Q4 .benchmark_q4_duckdb ,
441495 Q4 .benchmark_q4_native_rayforce ,
442496 ),
443497 (
444498 "Q5: Group by id3, sum v1, v2, v3" ,
445499 Q5 .benchmark_q5_rayforce ,
446500 Q5 .benchmark_q5_pandas ,
447501 Q5 .benchmark_q5_polars ,
502+ Q5 .benchmark_q5_duckdb ,
448503 Q5 .benchmark_q5_native_rayforce ,
449504 ),
450505 (
451506 "Q6: Group by id3, max(v1) - min(v2)" ,
452507 Q6 .benchmark_q6_rayforce ,
453508 Q6 .benchmark_q6_pandas ,
454509 Q6 .benchmark_q6_polars ,
510+ Q6 .benchmark_q6_duckdb ,
455511 Q6 .benchmark_q6_native_rayforce ,
456512 ),
457513]
0 commit comments