Skip to content

Bug: reads seem slow #353

@euri10

Description

@euri10

Description

see this totally llm engineered bench, seems like we may have a problem on reads, see the Read-heavy workload:

ROWS=10,000, READ_QUERIES=5,000, WAL=off


=== FILE-BACKED SQLITE ===

Session / connection cost
sqlite3 connect()                        0.00002s
sqlspec provide_session()                0.00001s
sqlalchemy engine.connect()              0.00001s
Slowdowns vs sqlite3: sqlspec 0.55x, sqlalchemy 0.65x

Write-heavy workload
sqlite3 writes                           0.00316s
sqlspec writes                           0.00511s
sqlalchemy core writes                   0.01225s
Slowdowns vs sqlite3: sqlspec 1.62x, sqlalchemy 3.88x

Read-heavy workload
sqlite3 reads                            0.00575s
sqlspec reads                            0.21776s
sqlalchemy core reads                    0.03657s
Slowdowns vs sqlite3: sqlspec 37.84x, sqlalchemy 6.36x


=== IN-MEMORY SQLITE ===

Session / connection cost
sqlite3 connect()                        0.00002s
sqlspec provide_session()                0.00001s
sqlalchemy engine.connect()              0.00001s
Slowdowns vs sqlite3: sqlspec 0.34x, sqlalchemy 0.37x

Write-heavy workload
sqlite3 writes                           0.00310s
sqlspec writes                           0.00492s
sqlalchemy core writes                   0.01291s
Slowdowns vs sqlite3: sqlspec 1.59x, sqlalchemy 4.16x

Read-heavy workload
sqlite3 reads                            0.00337s
sqlspec reads                            0.22856s
sqlalchemy core reads                    0.03510s
Slowdowns vs sqlite3: sqlspec 67.79x, sqlalchemy 10.41x

URL to code causing the issue

No response

MCVE

import time
import sqlite3
import tempfile
from pathlib import Path

from sqlspec import SQLSpec
from sqlspec.adapters.sqlite import SqliteConfig

from sqlalchemy import create_engine, text


# =========================
# Config
# =========================
ROWS = 10_000
READ_QUERIES = 5_000
RUNS = 5
ENABLE_WAL = False


# =========================
# Timing helpers
# =========================
def timeit(fn):
    start = time.perf_counter()
    fn()
    return time.perf_counter() - start


def run(label, fn):
    times = []
    fn()  # warmup
    for _ in range(RUNS):
        times.append(timeit(fn))
    avg = sum(times) / len(times)
    print(f"{label:<40} {avg:.5f}s")
    return avg


# =========================
# Setup helpers
# =========================
def sqlite_setup(conn):
    cur = conn.cursor()
    if ENABLE_WAL:
        cur.execute("pragma journal_mode=wal")
    cur.execute(
        "create table if not exists notes (id integer primary key, body text)"
    )
    conn.commit()


def sqlspec_setup(session):
    if ENABLE_WAL:
        session.execute("pragma journal_mode=wal")
    session.execute(
        "create table if not exists notes (id integer primary key, body text)"
    )


def populate_sqlite(conn):
    cur = conn.cursor()
    cur.executemany(
        "insert into notes (body) values (?)",
        [(f"note {i}",) for i in range(ROWS)],
    )
    conn.commit()


def populate_sqlspec(session):
    session.execute_many(
        "insert into notes (body) values (?)",
        [(f"note {i}",) for i in range(ROWS)],
    )


def populate_sqlalchemy(engine):
    with engine.begin() as conn:
        conn.execute(
            text(
                "create table if not exists notes "
                "(id integer primary key, body text)"
            )
        )
        conn.execute(
            text("insert into notes (body) values (:body)"),
            [{"body": f"note {i}"} for i in range(ROWS)],
        )


# =========================
# Session / connection cost
# =========================
def bench_sqlite_connect(db_url):
    def fn():
        conn = sqlite3.connect(db_url)
        conn.close()
    return fn


def bench_sqlspec_session(db_url):
    spec = SQLSpec()
    config = spec.add_config(
        SqliteConfig(connection_config={"database": db_url})
    )

    def fn():
        with spec.provide_session(config):
            pass

    return fn


def bench_sqlalchemy_connect(db_url):
    engine = create_engine(f"sqlite:///{db_url}")

    def fn():
        with engine.connect():
            pass

    return fn


# =========================
# Write-heavy workloads
# =========================
def bench_writes_sqlite(db_url):
    def fn():
        conn = sqlite3.connect(db_url)
        sqlite_setup(conn)
        populate_sqlite(conn)
        conn.close()
    return fn


def bench_writes_sqlspec(db_url):
    spec = SQLSpec()
    config = spec.add_config(
        SqliteConfig(connection_config={"database": db_url})
    )

    def fn():
        with spec.provide_session(config) as session:
            sqlspec_setup(session)
            populate_sqlspec(session)
    return fn


def bench_writes_sqlalchemy(db_url):
    engine = create_engine(f"sqlite:///{db_url}")
    return lambda: populate_sqlalchemy(engine)


# =========================
# Read-heavy workloads
# =========================
def bench_reads_sqlite(db_url):
    conn = sqlite3.connect(db_url)
    sqlite_setup(conn)
    populate_sqlite(conn)
    cur = conn.cursor()

    def fn():
        for i in range(READ_QUERIES):
            cur.execute(
                "select body from notes where id = ?",
                ((i % ROWS) + 1,),
            )
            cur.fetchone()

    return fn


def bench_reads_sqlspec(db_url):
    spec = SQLSpec()
    config = spec.add_config(
        SqliteConfig(connection_config={"database": db_url})
    )

    session_cm = spec.provide_session(config)
    session = session_cm.__enter__()
    sqlspec_setup(session)
    populate_sqlspec(session)

    def fn():
        for i in range(READ_QUERIES):
            session.execute(
                "select body from notes where id = ?",
                ((i % ROWS) + 1,),
            ).one()

    return fn


def bench_reads_sqlalchemy(db_url):
    engine = create_engine(f"sqlite:///{db_url}")
    populate_sqlalchemy(engine)

    stmt = text("select body from notes where id = :id")
    conn = engine.connect()

    def fn():
        for i in range(READ_QUERIES):
            conn.execute(
                stmt,
                {"id": (i % ROWS) + 1},
            ).fetchone()

    return fn

# =========================
# Benchmark runner (with relative slowdown)
# =========================
def run_suite(title, db_url):
    print(f"\n=== {title} ===\n")

    # -----------------
    # Session / connection cost
    # -----------------
    print("Session / connection cost")
    baseline = run("sqlite3 connect()", bench_sqlite_connect(db_url))
    sqlspec_time = run("sqlspec provide_session()", bench_sqlspec_session(db_url))
    sa_time = run("sqlalchemy engine.connect()", bench_sqlalchemy_connect(db_url))

    print(
        f"Slowdowns vs sqlite3: sqlspec {sqlspec_time/baseline:.2f}x, "
        f"sqlalchemy {sa_time/baseline:.2f}x\n"
    )

    # -----------------
    # Write-heavy workload
    # -----------------
    print("Write-heavy workload")
    baseline = run("sqlite3 writes", bench_writes_sqlite(db_url))
    sqlspec_time = run("sqlspec writes", bench_writes_sqlspec(db_url))
    sa_time = run("sqlalchemy core writes", bench_writes_sqlalchemy(db_url))

    print(
        f"Slowdowns vs sqlite3: sqlspec {sqlspec_time/baseline:.2f}x, "
        f"sqlalchemy {sa_time/baseline:.2f}x\n"
    )

    # -----------------
    # Read-heavy workload
    # -----------------
    print("Read-heavy workload")
    baseline = run("sqlite3 reads", bench_reads_sqlite(db_url))
    sqlspec_time = run("sqlspec reads", bench_reads_sqlspec(db_url))
    sa_time = run("sqlalchemy core reads", bench_reads_sqlalchemy(db_url))

    print(
        f"Slowdowns vs sqlite3: sqlspec {sqlspec_time/baseline:.2f}x, "
        f"sqlalchemy {sa_time/baseline:.2f}x\n"
    )

# =========================
# Main
# =========================
if __name__ == "__main__":
    print(
        f"\nROWS={ROWS:,}, READ_QUERIES={READ_QUERIES:,}, "
        f"WAL={'on' if ENABLE_WAL else 'off'}\n"
    )

    with tempfile.TemporaryDirectory() as d:
        run_suite("FILE-BACKED SQLITE", str(Path(d) / "bench.db"))

    run_suite("IN-MEMORY SQLITE", ":memory:")

Steps to reproduce

1. Go to '...'
2. Click on '....'
3. Scroll down to '....'
4. See error

Screenshots

"In the format of: ![SCREENSHOT_DESCRIPTION](SCREENSHOT_LINK.png)"

Logs

Package Version

0.38.4

Platform

  • Linux
  • Mac
  • Windows
  • Other (Please specify in the description above)

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions